annotate src/fftw-3.3.5/rdft/scalar/r2cb/hc2cbdft_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:52:04 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft_32 -include hc2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 498 FP additions, 260 FP multiplications,
Chris@42 32 * (or, 300 additions, 62 multiplications, 198 fused multiply/add),
Chris@42 33 * 165 stack variables, 7 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cb.h"
Chris@42 36
Chris@42 37 static void hc2cbdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 41 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 43 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 44 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 45 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 46 {
Chris@42 47 INT m;
Chris@42 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 49 E T8e, T8h, T7S, T8l, T8f, T84, T8c, T8k, T8g, T86, T82, T8m, T8i;
Chris@42 50 {
Chris@42 51 E T4B, T3h, T3K, Tv, T8Y, T6T, T8L, T7i, T8X, T7f, T4Y, T1G, T4K, T1j, T4X;
Chris@42 52 E T2M, T8C, T6d, T8o, T66, T8K, T6M, T4L, T2P, T4C, T3o, T5q, T4q, T8p, T6C;
Chris@42 53 E T8B, T6z, T72, T2u, T75, T10, T3P, T3a, T3L, T4t, T4E, T8F, T8t, T4F, T4w;
Chris@42 54 E T8E, T8w, T6E, T6l, T6F, T6s, T76, T4P, T51, T2R, T28, T8P, T90, T7k, T71;
Chris@42 55 E T2p, T4R, T2x, T73, T6x, T6y;
Chris@42 56 {
Chris@42 57 E T3l, T16, T3m, T2H, T2E, T13, T64, T7, T3i, T2J, T1c, T3j, T1h, T2K, Te;
Chris@42 58 E T1z, T6R, T6a, Tt, T3g, T6b, T1E, T6Q, Tj, T1p, Ti, T3b, T1n, Tk, T1q;
Chris@42 59 E T1r;
Chris@42 60 {
Chris@42 61 E T1, T2, T4, T5;
Chris@42 62 {
Chris@42 63 E T14, T15, T2F, T2G;
Chris@42 64 T14 = Ip[0];
Chris@42 65 T15 = Im[WS(rs, 15)];
Chris@42 66 T2F = Ip[WS(rs, 8)];
Chris@42 67 T2G = Im[WS(rs, 7)];
Chris@42 68 T1 = Rp[0];
Chris@42 69 T3l = T14 - T15;
Chris@42 70 T16 = T14 + T15;
Chris@42 71 T3m = T2F - T2G;
Chris@42 72 T2H = T2F + T2G;
Chris@42 73 T2 = Rm[WS(rs, 15)];
Chris@42 74 T4 = Rp[WS(rs, 8)];
Chris@42 75 T5 = Rm[WS(rs, 7)];
Chris@42 76 }
Chris@42 77 {
Chris@42 78 E T1b, T1e, T18, Ta, T1f, Tb, Tc, T8, T9, T1g, T1d, Td;
Chris@42 79 {
Chris@42 80 E T19, T3, T6, T1a;
Chris@42 81 T19 = Ip[WS(rs, 4)];
Chris@42 82 T2E = T1 - T2;
Chris@42 83 T3 = T1 + T2;
Chris@42 84 T13 = T4 - T5;
Chris@42 85 T6 = T4 + T5;
Chris@42 86 T1a = Im[WS(rs, 11)];
Chris@42 87 T8 = Rp[WS(rs, 4)];
Chris@42 88 T9 = Rm[WS(rs, 11)];
Chris@42 89 T64 = T3 - T6;
Chris@42 90 T7 = T3 + T6;
Chris@42 91 T1b = T19 + T1a;
Chris@42 92 T3i = T19 - T1a;
Chris@42 93 }
Chris@42 94 T1e = Im[WS(rs, 3)];
Chris@42 95 T18 = T8 - T9;
Chris@42 96 Ta = T8 + T9;
Chris@42 97 T1f = Ip[WS(rs, 12)];
Chris@42 98 Tb = Rm[WS(rs, 3)];
Chris@42 99 Tc = Rp[WS(rs, 12)];
Chris@42 100 T2J = T18 - T1b;
Chris@42 101 T1c = T18 + T1b;
Chris@42 102 T1g = T1e + T1f;
Chris@42 103 T3j = T1f - T1e;
Chris@42 104 T1d = Tb - Tc;
Chris@42 105 Td = Tb + Tc;
Chris@42 106 T1h = T1d + T1g;
Chris@42 107 T2K = T1d - T1g;
Chris@42 108 T6x = Ta - Td;
Chris@42 109 Te = Ta + Td;
Chris@42 110 }
Chris@42 111 {
Chris@42 112 E Tq, T1A, Tp, T3e, T1y, Tr, T1B, T1C;
Chris@42 113 {
Chris@42 114 E Tn, To, T1w, T1x;
Chris@42 115 Tn = Rm[WS(rs, 1)];
Chris@42 116 To = Rp[WS(rs, 14)];
Chris@42 117 T1w = Im[WS(rs, 1)];
Chris@42 118 T1x = Ip[WS(rs, 14)];
Chris@42 119 Tq = Rp[WS(rs, 6)];
Chris@42 120 T1A = Tn - To;
Chris@42 121 Tp = Tn + To;
Chris@42 122 T3e = T1x - T1w;
Chris@42 123 T1y = T1w + T1x;
Chris@42 124 Tr = Rm[WS(rs, 9)];
Chris@42 125 T1B = Ip[WS(rs, 6)];
Chris@42 126 T1C = Im[WS(rs, 9)];
Chris@42 127 }
Chris@42 128 {
Chris@42 129 E Tg, Th, T1l, T1m;
Chris@42 130 Tg = Rp[WS(rs, 2)];
Chris@42 131 {
Chris@42 132 E T1v, Ts, T3f, T1D;
Chris@42 133 T1v = Tq - Tr;
Chris@42 134 Ts = Tq + Tr;
Chris@42 135 T3f = T1B - T1C;
Chris@42 136 T1D = T1B + T1C;
Chris@42 137 T1z = T1v - T1y;
Chris@42 138 T6R = T1v + T1y;
Chris@42 139 T6a = Tp - Ts;
Chris@42 140 Tt = Tp + Ts;
Chris@42 141 T3g = T3e + T3f;
Chris@42 142 T6b = T3e - T3f;
Chris@42 143 T1E = T1A - T1D;
Chris@42 144 T6Q = T1A + T1D;
Chris@42 145 Th = Rm[WS(rs, 13)];
Chris@42 146 }
Chris@42 147 T1l = Ip[WS(rs, 2)];
Chris@42 148 T1m = Im[WS(rs, 13)];
Chris@42 149 Tj = Rp[WS(rs, 10)];
Chris@42 150 T1p = Tg - Th;
Chris@42 151 Ti = Tg + Th;
Chris@42 152 T3b = T1l - T1m;
Chris@42 153 T1n = T1l + T1m;
Chris@42 154 Tk = Rm[WS(rs, 5)];
Chris@42 155 T1q = Ip[WS(rs, 10)];
Chris@42 156 T1r = Im[WS(rs, 5)];
Chris@42 157 }
Chris@42 158 }
Chris@42 159 }
Chris@42 160 {
Chris@42 161 E T4o, T67, T68, T4p, T2I, T1i, T2N, T1u, T1F, T2O, T6K, T17;
Chris@42 162 {
Chris@42 163 E Tf, T1o, T1t, Tu, T7g, T6P, T6S, T7h, T7d, T7e;
Chris@42 164 {
Chris@42 165 E T6O, T6N, T1k, Tl;
Chris@42 166 T4o = T7 - Te;
Chris@42 167 Tf = T7 + Te;
Chris@42 168 T1k = Tj - Tk;
Chris@42 169 Tl = Tj + Tk;
Chris@42 170 {
Chris@42 171 E T3c, T1s, Tm, T3d;
Chris@42 172 T3c = T1q - T1r;
Chris@42 173 T1s = T1q + T1r;
Chris@42 174 T1o = T1k + T1n;
Chris@42 175 T6O = T1n - T1k;
Chris@42 176 T67 = Ti - Tl;
Chris@42 177 Tm = Ti + Tl;
Chris@42 178 T3d = T3b + T3c;
Chris@42 179 T68 = T3b - T3c;
Chris@42 180 T1t = T1p - T1s;
Chris@42 181 T6N = T1p + T1s;
Chris@42 182 T4B = Tm - Tt;
Chris@42 183 Tu = Tm + Tt;
Chris@42 184 T4p = T3g - T3d;
Chris@42 185 T3h = T3d + T3g;
Chris@42 186 }
Chris@42 187 T7g = FNMS(KP414213562, T6N, T6O);
Chris@42 188 T6P = FMA(KP414213562, T6O, T6N);
Chris@42 189 T6S = FMA(KP414213562, T6R, T6Q);
Chris@42 190 T7h = FNMS(KP414213562, T6Q, T6R);
Chris@42 191 }
Chris@42 192 T3K = Tf - Tu;
Chris@42 193 Tv = Tf + Tu;
Chris@42 194 T8Y = T6P + T6S;
Chris@42 195 T6T = T6P - T6S;
Chris@42 196 T2I = T2E - T2H;
Chris@42 197 T7d = T2E + T2H;
Chris@42 198 T7e = T1c + T1h;
Chris@42 199 T1i = T1c - T1h;
Chris@42 200 T2N = FNMS(KP414213562, T1o, T1t);
Chris@42 201 T1u = FMA(KP414213562, T1t, T1o);
Chris@42 202 T8L = T7h - T7g;
Chris@42 203 T7i = T7g + T7h;
Chris@42 204 T8X = FMA(KP707106781, T7e, T7d);
Chris@42 205 T7f = FNMS(KP707106781, T7e, T7d);
Chris@42 206 T1F = FNMS(KP414213562, T1E, T1z);
Chris@42 207 T2O = FMA(KP414213562, T1z, T1E);
Chris@42 208 T6K = T16 - T13;
Chris@42 209 T17 = T13 + T16;
Chris@42 210 }
Chris@42 211 {
Chris@42 212 E T6L, T6A, T6B, T65, T3k, T2L, T69, T6c, T3n;
Chris@42 213 T4Y = T1F - T1u;
Chris@42 214 T1G = T1u + T1F;
Chris@42 215 T4K = FNMS(KP707106781, T1i, T17);
Chris@42 216 T1j = FMA(KP707106781, T1i, T17);
Chris@42 217 T2L = T2J + T2K;
Chris@42 218 T6L = T2J - T2K;
Chris@42 219 T6A = T67 + T68;
Chris@42 220 T69 = T67 - T68;
Chris@42 221 T6c = T6a + T6b;
Chris@42 222 T6B = T6b - T6a;
Chris@42 223 T4X = FNMS(KP707106781, T2L, T2I);
Chris@42 224 T2M = FMA(KP707106781, T2L, T2I);
Chris@42 225 T8C = T69 - T6c;
Chris@42 226 T6d = T69 + T6c;
Chris@42 227 T65 = T3j - T3i;
Chris@42 228 T3k = T3i + T3j;
Chris@42 229 T8o = T64 - T65;
Chris@42 230 T66 = T64 + T65;
Chris@42 231 T8K = FNMS(KP707106781, T6L, T6K);
Chris@42 232 T6M = FMA(KP707106781, T6L, T6K);
Chris@42 233 T3n = T3l + T3m;
Chris@42 234 T6y = T3l - T3m;
Chris@42 235 T4L = T2N - T2O;
Chris@42 236 T2P = T2N + T2O;
Chris@42 237 T4C = T3n - T3k;
Chris@42 238 T3o = T3k + T3n;
Chris@42 239 T5q = T4o - T4p;
Chris@42 240 T4q = T4o + T4p;
Chris@42 241 T8p = T6B - T6A;
Chris@42 242 T6C = T6A + T6B;
Chris@42 243 }
Chris@42 244 }
Chris@42 245 }
Chris@42 246 {
Chris@42 247 E T1M, T6V, T6f, TC, T31, T6j, T23, T6Y, T2v, T2i, TY, T6p, T6n, T35, T2n;
Chris@42 248 E T2w, T24, T1R, TJ, T6i, T6g, T2Y, T1W, T25, T2q, TN, T2r, T36, T2c, T29;
Chris@42 249 E TQ, T2s;
Chris@42 250 {
Chris@42 251 E TU, T2k, T33, T2j, TX, T2l, T2m, T34;
Chris@42 252 {
Chris@42 253 E T1Z, Ty, T20, T2Z, T1L, T1I, TB, T21, T2e, T2h;
Chris@42 254 {
Chris@42 255 E T1J, T1K, Tw, Tx, Tz, TA;
Chris@42 256 Tw = Rp[WS(rs, 1)];
Chris@42 257 Tx = Rm[WS(rs, 14)];
Chris@42 258 T1J = Ip[WS(rs, 1)];
Chris@42 259 T8B = T6y - T6x;
Chris@42 260 T6z = T6x + T6y;
Chris@42 261 T1Z = Tw - Tx;
Chris@42 262 Ty = Tw + Tx;
Chris@42 263 T1K = Im[WS(rs, 14)];
Chris@42 264 Tz = Rp[WS(rs, 9)];
Chris@42 265 TA = Rm[WS(rs, 6)];
Chris@42 266 T20 = Ip[WS(rs, 9)];
Chris@42 267 T2Z = T1J - T1K;
Chris@42 268 T1L = T1J + T1K;
Chris@42 269 T1I = Tz - TA;
Chris@42 270 TB = Tz + TA;
Chris@42 271 T21 = Im[WS(rs, 6)];
Chris@42 272 }
Chris@42 273 {
Chris@42 274 E T2f, T2g, TV, TW;
Chris@42 275 {
Chris@42 276 E TS, T30, T22, TT;
Chris@42 277 TS = Rp[WS(rs, 3)];
Chris@42 278 T1M = T1I + T1L;
Chris@42 279 T6V = T1L - T1I;
Chris@42 280 T6f = Ty - TB;
Chris@42 281 TC = Ty + TB;
Chris@42 282 T30 = T20 - T21;
Chris@42 283 T22 = T20 + T21;
Chris@42 284 TT = Rm[WS(rs, 12)];
Chris@42 285 T2f = Ip[WS(rs, 3)];
Chris@42 286 T31 = T2Z + T30;
Chris@42 287 T6j = T2Z - T30;
Chris@42 288 T23 = T1Z - T22;
Chris@42 289 T6Y = T1Z + T22;
Chris@42 290 T2e = TS - TT;
Chris@42 291 TU = TS + TT;
Chris@42 292 T2g = Im[WS(rs, 12)];
Chris@42 293 }
Chris@42 294 TV = Rm[WS(rs, 4)];
Chris@42 295 TW = Rp[WS(rs, 11)];
Chris@42 296 T2k = Im[WS(rs, 4)];
Chris@42 297 T33 = T2f - T2g;
Chris@42 298 T2h = T2f + T2g;
Chris@42 299 T2j = TV - TW;
Chris@42 300 TX = TV + TW;
Chris@42 301 T2l = Ip[WS(rs, 11)];
Chris@42 302 }
Chris@42 303 T2v = T2e - T2h;
Chris@42 304 T2i = T2e + T2h;
Chris@42 305 }
Chris@42 306 TY = TU + TX;
Chris@42 307 T6p = TU - TX;
Chris@42 308 T2m = T2k + T2l;
Chris@42 309 T34 = T2l - T2k;
Chris@42 310 {
Chris@42 311 E TF, T1T, T2W, T1S, TI, T1U, T1N, T1Q, T1V, T2X;
Chris@42 312 {
Chris@42 313 E T1O, T1P, TD, TE, TG, TH;
Chris@42 314 TD = Rp[WS(rs, 5)];
Chris@42 315 TE = Rm[WS(rs, 10)];
Chris@42 316 T6n = T34 - T33;
Chris@42 317 T35 = T33 + T34;
Chris@42 318 T2n = T2j + T2m;
Chris@42 319 T2w = T2j - T2m;
Chris@42 320 T1N = TD - TE;
Chris@42 321 TF = TD + TE;
Chris@42 322 T1O = Ip[WS(rs, 5)];
Chris@42 323 T1P = Im[WS(rs, 10)];
Chris@42 324 TG = Rm[WS(rs, 2)];
Chris@42 325 TH = Rp[WS(rs, 13)];
Chris@42 326 T1T = Im[WS(rs, 2)];
Chris@42 327 T2W = T1O - T1P;
Chris@42 328 T1Q = T1O + T1P;
Chris@42 329 T1S = TG - TH;
Chris@42 330 TI = TG + TH;
Chris@42 331 T1U = Ip[WS(rs, 13)];
Chris@42 332 }
Chris@42 333 T24 = T1N - T1Q;
Chris@42 334 T1R = T1N + T1Q;
Chris@42 335 TJ = TF + TI;
Chris@42 336 T6i = TF - TI;
Chris@42 337 T1V = T1T + T1U;
Chris@42 338 T2X = T1U - T1T;
Chris@42 339 {
Chris@42 340 E T2a, T2b, TL, TM, TO, TP;
Chris@42 341 TL = Rm[0];
Chris@42 342 TM = Rp[WS(rs, 15)];
Chris@42 343 T6g = T2X - T2W;
Chris@42 344 T2Y = T2W + T2X;
Chris@42 345 T1W = T1S + T1V;
Chris@42 346 T25 = T1S - T1V;
Chris@42 347 T2q = TL - TM;
Chris@42 348 TN = TL + TM;
Chris@42 349 T2a = Im[0];
Chris@42 350 T2b = Ip[WS(rs, 15)];
Chris@42 351 TO = Rp[WS(rs, 7)];
Chris@42 352 TP = Rm[WS(rs, 8)];
Chris@42 353 T2r = Ip[WS(rs, 7)];
Chris@42 354 T36 = T2b - T2a;
Chris@42 355 T2c = T2a + T2b;
Chris@42 356 T29 = TO - TP;
Chris@42 357 TQ = TO + TP;
Chris@42 358 T2s = Im[WS(rs, 8)];
Chris@42 359 }
Chris@42 360 }
Chris@42 361 }
Chris@42 362 {
Chris@42 363 E T2d, T4u, T4v, T6r, T6o, T6k, T8u, T8v, T6h;
Chris@42 364 {
Chris@42 365 E T4r, T6m, T32, T4s, T6q, T39, T8r, T8s;
Chris@42 366 {
Chris@42 367 E TK, TR, T37, T2t, TZ, T38;
Chris@42 368 T4r = TC - TJ;
Chris@42 369 TK = TC + TJ;
Chris@42 370 T2d = T29 - T2c;
Chris@42 371 T72 = T29 + T2c;
Chris@42 372 T6m = TN - TQ;
Chris@42 373 TR = TN + TQ;
Chris@42 374 T37 = T2r - T2s;
Chris@42 375 T2t = T2r + T2s;
Chris@42 376 T32 = T2Y + T31;
Chris@42 377 T4s = T31 - T2Y;
Chris@42 378 T4u = TR - TY;
Chris@42 379 TZ = TR + TY;
Chris@42 380 T38 = T36 + T37;
Chris@42 381 T6q = T36 - T37;
Chris@42 382 T2u = T2q - T2t;
Chris@42 383 T75 = T2q + T2t;
Chris@42 384 T10 = TK + TZ;
Chris@42 385 T3P = TK - TZ;
Chris@42 386 T4v = T38 - T35;
Chris@42 387 T39 = T35 + T38;
Chris@42 388 }
Chris@42 389 T8r = T6q - T6p;
Chris@42 390 T6r = T6p + T6q;
Chris@42 391 T3a = T32 + T39;
Chris@42 392 T3L = T39 - T32;
Chris@42 393 T8s = T6m - T6n;
Chris@42 394 T6o = T6m + T6n;
Chris@42 395 T4t = T4r - T4s;
Chris@42 396 T4E = T4r + T4s;
Chris@42 397 T8F = FNMS(KP414213562, T8r, T8s);
Chris@42 398 T8t = FMA(KP414213562, T8s, T8r);
Chris@42 399 T6k = T6i + T6j;
Chris@42 400 T8u = T6j - T6i;
Chris@42 401 T8v = T6f - T6g;
Chris@42 402 T6h = T6f + T6g;
Chris@42 403 }
Chris@42 404 {
Chris@42 405 E T6Z, T1Y, T4O, T26, T6W, T1X, T2o, T4N, T27;
Chris@42 406 T4F = T4v - T4u;
Chris@42 407 T4w = T4u + T4v;
Chris@42 408 T8E = FMA(KP414213562, T8u, T8v);
Chris@42 409 T8w = FNMS(KP414213562, T8v, T8u);
Chris@42 410 T6Z = T1R + T1W;
Chris@42 411 T1X = T1R - T1W;
Chris@42 412 T6E = FMA(KP414213562, T6h, T6k);
Chris@42 413 T6l = FNMS(KP414213562, T6k, T6h);
Chris@42 414 T6F = FNMS(KP414213562, T6o, T6r);
Chris@42 415 T6s = FMA(KP414213562, T6r, T6o);
Chris@42 416 T1Y = FMA(KP707106781, T1X, T1M);
Chris@42 417 T4O = FNMS(KP707106781, T1X, T1M);
Chris@42 418 T26 = T24 + T25;
Chris@42 419 T6W = T25 - T24;
Chris@42 420 T76 = T2i + T2n;
Chris@42 421 T2o = T2i - T2n;
Chris@42 422 T4N = FNMS(KP707106781, T26, T23);
Chris@42 423 T27 = FMA(KP707106781, T26, T23);
Chris@42 424 {
Chris@42 425 E T8O, T6X, T8N, T70;
Chris@42 426 T8O = FMA(KP707106781, T6W, T6V);
Chris@42 427 T6X = FNMS(KP707106781, T6W, T6V);
Chris@42 428 T8N = FMA(KP707106781, T6Z, T6Y);
Chris@42 429 T70 = FNMS(KP707106781, T6Z, T6Y);
Chris@42 430 T4P = FMA(KP668178637, T4O, T4N);
Chris@42 431 T51 = FNMS(KP668178637, T4N, T4O);
Chris@42 432 T2R = FNMS(KP198912367, T1Y, T27);
Chris@42 433 T28 = FMA(KP198912367, T27, T1Y);
Chris@42 434 T8P = FMA(KP198912367, T8O, T8N);
Chris@42 435 T90 = FNMS(KP198912367, T8N, T8O);
Chris@42 436 T7k = FNMS(KP668178637, T6X, T70);
Chris@42 437 T71 = FMA(KP668178637, T70, T6X);
Chris@42 438 T2p = FMA(KP707106781, T2o, T2d);
Chris@42 439 T4R = FNMS(KP707106781, T2o, T2d);
Chris@42 440 }
Chris@42 441 T2x = T2v + T2w;
Chris@42 442 T73 = T2v - T2w;
Chris@42 443 }
Chris@42 444 }
Chris@42 445 }
Chris@42 446 {
Chris@42 447 E T8S, T91, T7l, T78, T5U, T5X, T5y, T61, T5V, T5K, T5S, T60, T5W, T5M, T5I;
Chris@42 448 {
Chris@42 449 E T4S, T50, T4e, T4h, T3S, T4l, T4f, T44, T4c, T4k, T4g, T46, T42;
Chris@42 450 {
Chris@42 451 E T3Q, T3U, T40, T3Z, T3V, T3A, T3D, T3H, T3B, T3y, T3G, T3C;
Chris@42 452 {
Chris@42 453 E T11, T3t, T3w, T3q, T3x, T3v, T3F, T12, T2B, T2U, T3z, T2C;
Chris@42 454 {
Chris@42 455 E T3u, T2S, T2z, T3p, T4Q, T2y;
Chris@42 456 T3u = Tv - T10;
Chris@42 457 T11 = Tv + T10;
Chris@42 458 T4Q = FNMS(KP707106781, T2x, T2u);
Chris@42 459 T2y = FMA(KP707106781, T2x, T2u);
Chris@42 460 {
Chris@42 461 E T8R, T74, T8Q, T77;
Chris@42 462 T8R = FMA(KP707106781, T73, T72);
Chris@42 463 T74 = FNMS(KP707106781, T73, T72);
Chris@42 464 T8Q = FMA(KP707106781, T76, T75);
Chris@42 465 T77 = FNMS(KP707106781, T76, T75);
Chris@42 466 T4S = FNMS(KP668178637, T4R, T4Q);
Chris@42 467 T50 = FMA(KP668178637, T4Q, T4R);
Chris@42 468 T2S = FMA(KP198912367, T2p, T2y);
Chris@42 469 T2z = FNMS(KP198912367, T2y, T2p);
Chris@42 470 T8S = FMA(KP198912367, T8R, T8Q);
Chris@42 471 T91 = FNMS(KP198912367, T8Q, T8R);
Chris@42 472 T7l = FNMS(KP668178637, T74, T77);
Chris@42 473 T78 = FMA(KP668178637, T77, T74);
Chris@42 474 T3Q = T3o - T3h;
Chris@42 475 T3p = T3h + T3o;
Chris@42 476 }
Chris@42 477 T3t = W[30];
Chris@42 478 T3w = W[31];
Chris@42 479 T3q = T3a + T3p;
Chris@42 480 T3x = T3p - T3a;
Chris@42 481 T3v = T3t * T3u;
Chris@42 482 T3F = T3w * T3u;
Chris@42 483 {
Chris@42 484 E T1H, T2A, T2Q, T2T;
Chris@42 485 T3U = FNMS(KP923879532, T1G, T1j);
Chris@42 486 T1H = FMA(KP923879532, T1G, T1j);
Chris@42 487 T2A = T28 + T2z;
Chris@42 488 T40 = T2z - T28;
Chris@42 489 T3Z = FNMS(KP923879532, T2P, T2M);
Chris@42 490 T2Q = FMA(KP923879532, T2P, T2M);
Chris@42 491 T2T = T2R + T2S;
Chris@42 492 T3V = T2R - T2S;
Chris@42 493 T12 = W[0];
Chris@42 494 T3A = FNMS(KP980785280, T2A, T1H);
Chris@42 495 T2B = FMA(KP980785280, T2A, T1H);
Chris@42 496 T3D = FNMS(KP980785280, T2T, T2Q);
Chris@42 497 T2U = FMA(KP980785280, T2T, T2Q);
Chris@42 498 T3z = W[32];
Chris@42 499 T2C = T12 * T2B;
Chris@42 500 }
Chris@42 501 }
Chris@42 502 {
Chris@42 503 E T2V, T3s, T2D, T3r;
Chris@42 504 T2D = W[1];
Chris@42 505 T3r = T12 * T2U;
Chris@42 506 T3H = T3z * T3D;
Chris@42 507 T3B = T3z * T3A;
Chris@42 508 T2V = FMA(T2D, T2U, T2C);
Chris@42 509 T3s = FNMS(T2D, T2B, T3r);
Chris@42 510 T3y = FNMS(T3w, T3x, T3v);
Chris@42 511 T3G = FMA(T3t, T3x, T3F);
Chris@42 512 Rm[0] = T11 + T2V;
Chris@42 513 Rp[0] = T11 - T2V;
Chris@42 514 Im[0] = T3s - T3q;
Chris@42 515 Ip[0] = T3q + T3s;
Chris@42 516 T3C = W[33];
Chris@42 517 }
Chris@42 518 }
Chris@42 519 {
Chris@42 520 E T4b, T3R, T47, T4a, T3J, T49, T4j, T3O, T3N, T43, T3W, T3T, T41, T4d, T3X;
Chris@42 521 E T45, T3Y;
Chris@42 522 {
Chris@42 523 E T3M, T48, T3I, T3E;
Chris@42 524 T3M = T3K + T3L;
Chris@42 525 T48 = T3K - T3L;
Chris@42 526 T3I = FNMS(T3C, T3A, T3H);
Chris@42 527 T3E = FMA(T3C, T3D, T3B);
Chris@42 528 T4b = T3Q - T3P;
Chris@42 529 T3R = T3P + T3Q;
Chris@42 530 Im[WS(rs, 8)] = T3I - T3G;
Chris@42 531 Ip[WS(rs, 8)] = T3G + T3I;
Chris@42 532 Rm[WS(rs, 8)] = T3y + T3E;
Chris@42 533 Rp[WS(rs, 8)] = T3y - T3E;
Chris@42 534 T47 = W[46];
Chris@42 535 T4a = W[47];
Chris@42 536 T3J = W[14];
Chris@42 537 T49 = T47 * T48;
Chris@42 538 T4j = T4a * T48;
Chris@42 539 T3O = W[15];
Chris@42 540 T3N = T3J * T3M;
Chris@42 541 T43 = T3O * T3M;
Chris@42 542 T3W = FMA(KP980785280, T3V, T3U);
Chris@42 543 T4e = FNMS(KP980785280, T3V, T3U);
Chris@42 544 T3T = W[16];
Chris@42 545 T4h = FNMS(KP980785280, T40, T3Z);
Chris@42 546 T41 = FMA(KP980785280, T40, T3Z);
Chris@42 547 T4d = W[48];
Chris@42 548 T3X = T3T * T3W;
Chris@42 549 }
Chris@42 550 T3S = FNMS(T3O, T3R, T3N);
Chris@42 551 T45 = T3T * T41;
Chris@42 552 T4l = T4d * T4h;
Chris@42 553 T4f = T4d * T4e;
Chris@42 554 T44 = FMA(T3J, T3R, T43);
Chris@42 555 T3Y = W[17];
Chris@42 556 T4c = FNMS(T4a, T4b, T49);
Chris@42 557 T4k = FMA(T47, T4b, T4j);
Chris@42 558 T4g = W[49];
Chris@42 559 T46 = FNMS(T3Y, T3W, T45);
Chris@42 560 T42 = FMA(T3Y, T41, T3X);
Chris@42 561 }
Chris@42 562 }
Chris@42 563 {
Chris@42 564 E T5v, T5r, T5w, T5A, T5G, T5F, T5B, T5g, T5j, T4I, T5n, T5h, T56, T5e, T5m;
Chris@42 565 E T5i, T58, T54;
Chris@42 566 {
Chris@42 567 E T4n, T4A, T5d, T4H, T59, T5c, T55, T4z, T5b, T5l, T4J, T4U, T53, T5f, T4V;
Chris@42 568 E T57, T4W;
Chris@42 569 {
Chris@42 570 E T4D, T4G, T4m, T4i, T5a, T4y, T4x;
Chris@42 571 T5v = T4C - T4B;
Chris@42 572 T4D = T4B + T4C;
Chris@42 573 T4m = FNMS(T4g, T4e, T4l);
Chris@42 574 T4i = FMA(T4g, T4h, T4f);
Chris@42 575 Im[WS(rs, 4)] = T46 - T44;
Chris@42 576 Ip[WS(rs, 4)] = T44 + T46;
Chris@42 577 Rm[WS(rs, 4)] = T3S + T42;
Chris@42 578 Rp[WS(rs, 4)] = T3S - T42;
Chris@42 579 Im[WS(rs, 12)] = T4m - T4k;
Chris@42 580 Ip[WS(rs, 12)] = T4k + T4m;
Chris@42 581 Rm[WS(rs, 12)] = T4c + T4i;
Chris@42 582 Rp[WS(rs, 12)] = T4c - T4i;
Chris@42 583 T4G = T4E + T4F;
Chris@42 584 T5r = T4F - T4E;
Chris@42 585 T5w = T4t - T4w;
Chris@42 586 T4x = T4t + T4w;
Chris@42 587 T4n = W[6];
Chris@42 588 T4A = W[7];
Chris@42 589 T5d = FNMS(KP707106781, T4G, T4D);
Chris@42 590 T4H = FMA(KP707106781, T4G, T4D);
Chris@42 591 T5a = FNMS(KP707106781, T4x, T4q);
Chris@42 592 T4y = FMA(KP707106781, T4x, T4q);
Chris@42 593 T59 = W[38];
Chris@42 594 T5c = W[39];
Chris@42 595 {
Chris@42 596 E T4M, T4T, T4Z, T52;
Chris@42 597 T4M = FMA(KP923879532, T4L, T4K);
Chris@42 598 T5A = FNMS(KP923879532, T4L, T4K);
Chris@42 599 T55 = T4A * T4y;
Chris@42 600 T4z = T4n * T4y;
Chris@42 601 T5b = T59 * T5a;
Chris@42 602 T5l = T5c * T5a;
Chris@42 603 T5G = T4P + T4S;
Chris@42 604 T4T = T4P - T4S;
Chris@42 605 T4Z = FMA(KP923879532, T4Y, T4X);
Chris@42 606 T5F = FNMS(KP923879532, T4Y, T4X);
Chris@42 607 T5B = T51 + T50;
Chris@42 608 T52 = T50 - T51;
Chris@42 609 T4J = W[8];
Chris@42 610 T4U = FMA(KP831469612, T4T, T4M);
Chris@42 611 T5g = FNMS(KP831469612, T4T, T4M);
Chris@42 612 T53 = FMA(KP831469612, T52, T4Z);
Chris@42 613 T5j = FNMS(KP831469612, T52, T4Z);
Chris@42 614 T5f = W[40];
Chris@42 615 T4V = T4J * T4U;
Chris@42 616 }
Chris@42 617 }
Chris@42 618 T4I = FNMS(T4A, T4H, T4z);
Chris@42 619 T57 = T4J * T53;
Chris@42 620 T5n = T5f * T5j;
Chris@42 621 T5h = T5f * T5g;
Chris@42 622 T56 = FMA(T4n, T4H, T55);
Chris@42 623 T4W = W[9];
Chris@42 624 T5e = FNMS(T5c, T5d, T5b);
Chris@42 625 T5m = FMA(T59, T5d, T5l);
Chris@42 626 T5i = W[41];
Chris@42 627 T58 = FNMS(T4W, T4U, T57);
Chris@42 628 T54 = FMA(T4W, T53, T4V);
Chris@42 629 }
Chris@42 630 {
Chris@42 631 E T5p, T5u, T5x, T5R, T5N, T5Q, T5J, T5t, T5P, T5Z, T5z, T5C, T5H, T5T, T5D;
Chris@42 632 E T5L, T5E;
Chris@42 633 {
Chris@42 634 E T5o, T5k, T5s, T5O;
Chris@42 635 T5o = FNMS(T5i, T5g, T5n);
Chris@42 636 T5k = FMA(T5i, T5j, T5h);
Chris@42 637 Im[WS(rs, 2)] = T58 - T56;
Chris@42 638 Ip[WS(rs, 2)] = T56 + T58;
Chris@42 639 Rm[WS(rs, 2)] = T4I + T54;
Chris@42 640 Rp[WS(rs, 2)] = T4I - T54;
Chris@42 641 Im[WS(rs, 10)] = T5o - T5m;
Chris@42 642 Ip[WS(rs, 10)] = T5m + T5o;
Chris@42 643 Rm[WS(rs, 10)] = T5e + T5k;
Chris@42 644 Rp[WS(rs, 10)] = T5e - T5k;
Chris@42 645 T5p = W[22];
Chris@42 646 T5u = W[23];
Chris@42 647 T5x = FMA(KP707106781, T5w, T5v);
Chris@42 648 T5R = FNMS(KP707106781, T5w, T5v);
Chris@42 649 T5s = FMA(KP707106781, T5r, T5q);
Chris@42 650 T5O = FNMS(KP707106781, T5r, T5q);
Chris@42 651 T5N = W[54];
Chris@42 652 T5Q = W[55];
Chris@42 653 T5J = T5u * T5s;
Chris@42 654 T5t = T5p * T5s;
Chris@42 655 T5P = T5N * T5O;
Chris@42 656 T5Z = T5Q * T5O;
Chris@42 657 T5z = W[24];
Chris@42 658 T5U = FMA(KP831469612, T5B, T5A);
Chris@42 659 T5C = FNMS(KP831469612, T5B, T5A);
Chris@42 660 T5X = FMA(KP831469612, T5G, T5F);
Chris@42 661 T5H = FNMS(KP831469612, T5G, T5F);
Chris@42 662 T5T = W[56];
Chris@42 663 T5D = T5z * T5C;
Chris@42 664 }
Chris@42 665 T5y = FNMS(T5u, T5x, T5t);
Chris@42 666 T5L = T5z * T5H;
Chris@42 667 T61 = T5T * T5X;
Chris@42 668 T5V = T5T * T5U;
Chris@42 669 T5K = FMA(T5p, T5x, T5J);
Chris@42 670 T5E = W[25];
Chris@42 671 T5S = FNMS(T5Q, T5R, T5P);
Chris@42 672 T60 = FMA(T5N, T5R, T5Z);
Chris@42 673 T5W = W[57];
Chris@42 674 T5M = FNMS(T5E, T5C, T5L);
Chris@42 675 T5I = FMA(T5E, T5H, T5D);
Chris@42 676 }
Chris@42 677 }
Chris@42 678 }
Chris@42 679 {
Chris@42 680 E T7P, T7L, T7K, T7Q, T7U, T80, T7Z, T7V, T9v, T9r, T9q, T9w, T9A, T9G, T9F;
Chris@42 681 E T9B, T9g, T9j, T8I, T9n, T9h, T96, T9e, T9m, T9i, T98, T94;
Chris@42 682 {
Chris@42 683 E T7A, T7D, T6I, T7H, T7B, T7q, T7y, T7G, T7C, T7s, T7o;
Chris@42 684 {
Chris@42 685 E T63, T7x, T6H, T6w, T7t, T7w, T6v, T7p, T7v, T7F, T6J, T7a, T7n, T7z, T7b;
Chris@42 686 E T7r, T7c;
Chris@42 687 {
Chris@42 688 E T6D, T6G, T62, T5Y;
Chris@42 689 T7P = FNMS(KP707106781, T6C, T6z);
Chris@42 690 T6D = FMA(KP707106781, T6C, T6z);
Chris@42 691 T62 = FNMS(T5W, T5U, T61);
Chris@42 692 T5Y = FMA(T5W, T5X, T5V);
Chris@42 693 Im[WS(rs, 6)] = T5M - T5K;
Chris@42 694 Ip[WS(rs, 6)] = T5K + T5M;
Chris@42 695 Rm[WS(rs, 6)] = T5y + T5I;
Chris@42 696 Rp[WS(rs, 6)] = T5y - T5I;
Chris@42 697 Im[WS(rs, 14)] = T62 - T60;
Chris@42 698 Ip[WS(rs, 14)] = T60 + T62;
Chris@42 699 Rm[WS(rs, 14)] = T5S + T5Y;
Chris@42 700 Rp[WS(rs, 14)] = T5S - T5Y;
Chris@42 701 T6G = T6E + T6F;
Chris@42 702 T7L = T6F - T6E;
Chris@42 703 {
Chris@42 704 E T6e, T6t, T7u, T6u;
Chris@42 705 T7K = FNMS(KP707106781, T6d, T66);
Chris@42 706 T6e = FMA(KP707106781, T6d, T66);
Chris@42 707 T6t = T6l + T6s;
Chris@42 708 T7Q = T6l - T6s;
Chris@42 709 T63 = W[2];
Chris@42 710 T7x = FNMS(KP923879532, T6G, T6D);
Chris@42 711 T6H = FMA(KP923879532, T6G, T6D);
Chris@42 712 T7u = FNMS(KP923879532, T6t, T6e);
Chris@42 713 T6u = FMA(KP923879532, T6t, T6e);
Chris@42 714 T6w = W[3];
Chris@42 715 T7t = W[34];
Chris@42 716 T7w = W[35];
Chris@42 717 T6v = T63 * T6u;
Chris@42 718 T7p = T6w * T6u;
Chris@42 719 T7v = T7t * T7u;
Chris@42 720 T7F = T7w * T7u;
Chris@42 721 }
Chris@42 722 {
Chris@42 723 E T6U, T79, T7j, T7m;
Chris@42 724 T7U = FNMS(KP923879532, T6T, T6M);
Chris@42 725 T6U = FMA(KP923879532, T6T, T6M);
Chris@42 726 T79 = T71 - T78;
Chris@42 727 T80 = T71 + T78;
Chris@42 728 T7Z = FMA(KP923879532, T7i, T7f);
Chris@42 729 T7j = FNMS(KP923879532, T7i, T7f);
Chris@42 730 T7m = T7k + T7l;
Chris@42 731 T7V = T7k - T7l;
Chris@42 732 T6J = W[4];
Chris@42 733 T7A = FNMS(KP831469612, T79, T6U);
Chris@42 734 T7a = FMA(KP831469612, T79, T6U);
Chris@42 735 T7D = FNMS(KP831469612, T7m, T7j);
Chris@42 736 T7n = FMA(KP831469612, T7m, T7j);
Chris@42 737 T7z = W[36];
Chris@42 738 T7b = T6J * T7a;
Chris@42 739 }
Chris@42 740 }
Chris@42 741 T6I = FNMS(T6w, T6H, T6v);
Chris@42 742 T7r = T6J * T7n;
Chris@42 743 T7H = T7z * T7D;
Chris@42 744 T7B = T7z * T7A;
Chris@42 745 T7q = FMA(T63, T6H, T7p);
Chris@42 746 T7c = W[5];
Chris@42 747 T7y = FNMS(T7w, T7x, T7v);
Chris@42 748 T7G = FMA(T7t, T7x, T7F);
Chris@42 749 T7C = W[37];
Chris@42 750 T7s = FNMS(T7c, T7a, T7r);
Chris@42 751 T7o = FMA(T7c, T7n, T7b);
Chris@42 752 }
Chris@42 753 {
Chris@42 754 E T8n, T9d, T8H, T8A, T99, T9c, T8z, T95, T9b, T9l, T8J, T8U, T93, T9f, T8V;
Chris@42 755 E T97, T8W;
Chris@42 756 {
Chris@42 757 E T8D, T8G, T7I, T7E;
Chris@42 758 T9v = FNMS(KP707106781, T8C, T8B);
Chris@42 759 T8D = FMA(KP707106781, T8C, T8B);
Chris@42 760 T7I = FNMS(T7C, T7A, T7H);
Chris@42 761 T7E = FMA(T7C, T7D, T7B);
Chris@42 762 Im[WS(rs, 1)] = T7s - T7q;
Chris@42 763 Ip[WS(rs, 1)] = T7q + T7s;
Chris@42 764 Rm[WS(rs, 1)] = T6I + T7o;
Chris@42 765 Rp[WS(rs, 1)] = T6I - T7o;
Chris@42 766 Im[WS(rs, 9)] = T7I - T7G;
Chris@42 767 Ip[WS(rs, 9)] = T7G + T7I;
Chris@42 768 Rm[WS(rs, 9)] = T7y + T7E;
Chris@42 769 Rp[WS(rs, 9)] = T7y - T7E;
Chris@42 770 T8G = T8E - T8F;
Chris@42 771 T9r = T8E + T8F;
Chris@42 772 {
Chris@42 773 E T8q, T8x, T9a, T8y;
Chris@42 774 T9q = FNMS(KP707106781, T8p, T8o);
Chris@42 775 T8q = FMA(KP707106781, T8p, T8o);
Chris@42 776 T8x = T8t - T8w;
Chris@42 777 T9w = T8w + T8t;
Chris@42 778 T8n = W[10];
Chris@42 779 T9d = FNMS(KP923879532, T8G, T8D);
Chris@42 780 T8H = FMA(KP923879532, T8G, T8D);
Chris@42 781 T9a = FNMS(KP923879532, T8x, T8q);
Chris@42 782 T8y = FMA(KP923879532, T8x, T8q);
Chris@42 783 T8A = W[11];
Chris@42 784 T99 = W[42];
Chris@42 785 T9c = W[43];
Chris@42 786 T8z = T8n * T8y;
Chris@42 787 T95 = T8A * T8y;
Chris@42 788 T9b = T99 * T9a;
Chris@42 789 T9l = T9c * T9a;
Chris@42 790 }
Chris@42 791 {
Chris@42 792 E T8M, T8T, T8Z, T92;
Chris@42 793 T9A = FNMS(KP923879532, T8L, T8K);
Chris@42 794 T8M = FMA(KP923879532, T8L, T8K);
Chris@42 795 T8T = T8P - T8S;
Chris@42 796 T9G = T8P + T8S;
Chris@42 797 T9F = FMA(KP923879532, T8Y, T8X);
Chris@42 798 T8Z = FNMS(KP923879532, T8Y, T8X);
Chris@42 799 T92 = T90 + T91;
Chris@42 800 T9B = T91 - T90;
Chris@42 801 T8J = W[12];
Chris@42 802 T9g = FNMS(KP980785280, T8T, T8M);
Chris@42 803 T8U = FMA(KP980785280, T8T, T8M);
Chris@42 804 T9j = FMA(KP980785280, T92, T8Z);
Chris@42 805 T93 = FNMS(KP980785280, T92, T8Z);
Chris@42 806 T9f = W[44];
Chris@42 807 T8V = T8J * T8U;
Chris@42 808 }
Chris@42 809 }
Chris@42 810 T8I = FNMS(T8A, T8H, T8z);
Chris@42 811 T97 = T8J * T93;
Chris@42 812 T9n = T9f * T9j;
Chris@42 813 T9h = T9f * T9g;
Chris@42 814 T96 = FMA(T8n, T8H, T95);
Chris@42 815 T8W = W[13];
Chris@42 816 T9e = FNMS(T9c, T9d, T9b);
Chris@42 817 T9m = FMA(T99, T9d, T9l);
Chris@42 818 T9i = W[45];
Chris@42 819 T98 = FNMS(T8W, T8U, T97);
Chris@42 820 T94 = FMA(T8W, T93, T8V);
Chris@42 821 }
Chris@42 822 }
Chris@42 823 {
Chris@42 824 E T9U, T9X, T9y, Ta1, T9V, T9K, T9S, Ta0, T9W, T9M, T9I;
Chris@42 825 {
Chris@42 826 E T9p, T9R, T9x, T9u, T9N, T9Q, T9t, T9J, T9P, T9Z, T9z, T9C, T9H, T9T, T9D;
Chris@42 827 E T9L, T9E;
Chris@42 828 {
Chris@42 829 E T9o, T9k, T9O, T9s;
Chris@42 830 T9o = FNMS(T9i, T9g, T9n);
Chris@42 831 T9k = FMA(T9i, T9j, T9h);
Chris@42 832 Im[WS(rs, 3)] = T98 - T96;
Chris@42 833 Ip[WS(rs, 3)] = T96 + T98;
Chris@42 834 Rm[WS(rs, 3)] = T8I + T94;
Chris@42 835 Rp[WS(rs, 3)] = T8I - T94;
Chris@42 836 Im[WS(rs, 11)] = T9o - T9m;
Chris@42 837 Ip[WS(rs, 11)] = T9m + T9o;
Chris@42 838 Rm[WS(rs, 11)] = T9e + T9k;
Chris@42 839 Rp[WS(rs, 11)] = T9e - T9k;
Chris@42 840 T9p = W[26];
Chris@42 841 T9R = FMA(KP923879532, T9w, T9v);
Chris@42 842 T9x = FNMS(KP923879532, T9w, T9v);
Chris@42 843 T9O = FMA(KP923879532, T9r, T9q);
Chris@42 844 T9s = FNMS(KP923879532, T9r, T9q);
Chris@42 845 T9u = W[27];
Chris@42 846 T9N = W[58];
Chris@42 847 T9Q = W[59];
Chris@42 848 T9t = T9p * T9s;
Chris@42 849 T9J = T9u * T9s;
Chris@42 850 T9P = T9N * T9O;
Chris@42 851 T9Z = T9Q * T9O;
Chris@42 852 T9z = W[28];
Chris@42 853 T9U = FNMS(KP980785280, T9B, T9A);
Chris@42 854 T9C = FMA(KP980785280, T9B, T9A);
Chris@42 855 T9X = FMA(KP980785280, T9G, T9F);
Chris@42 856 T9H = FNMS(KP980785280, T9G, T9F);
Chris@42 857 T9T = W[60];
Chris@42 858 T9D = T9z * T9C;
Chris@42 859 }
Chris@42 860 T9y = FNMS(T9u, T9x, T9t);
Chris@42 861 T9L = T9z * T9H;
Chris@42 862 Ta1 = T9T * T9X;
Chris@42 863 T9V = T9T * T9U;
Chris@42 864 T9K = FMA(T9p, T9x, T9J);
Chris@42 865 T9E = W[29];
Chris@42 866 T9S = FNMS(T9Q, T9R, T9P);
Chris@42 867 Ta0 = FMA(T9N, T9R, T9Z);
Chris@42 868 T9W = W[61];
Chris@42 869 T9M = FNMS(T9E, T9C, T9L);
Chris@42 870 T9I = FMA(T9E, T9H, T9D);
Chris@42 871 }
Chris@42 872 {
Chris@42 873 E T7J, T8b, T7R, T7O, T87, T8a, T7N, T83, T89, T8j, T7T, T7W, T81, T8d, T7X;
Chris@42 874 E T85, T7Y;
Chris@42 875 {
Chris@42 876 E Ta2, T9Y, T88, T7M;
Chris@42 877 Ta2 = FNMS(T9W, T9U, Ta1);
Chris@42 878 T9Y = FMA(T9W, T9X, T9V);
Chris@42 879 Im[WS(rs, 7)] = T9M - T9K;
Chris@42 880 Ip[WS(rs, 7)] = T9K + T9M;
Chris@42 881 Rm[WS(rs, 7)] = T9y + T9I;
Chris@42 882 Rp[WS(rs, 7)] = T9y - T9I;
Chris@42 883 Im[WS(rs, 15)] = Ta2 - Ta0;
Chris@42 884 Ip[WS(rs, 15)] = Ta0 + Ta2;
Chris@42 885 Rm[WS(rs, 15)] = T9S + T9Y;
Chris@42 886 Rp[WS(rs, 15)] = T9S - T9Y;
Chris@42 887 T7J = W[18];
Chris@42 888 T8b = FNMS(KP923879532, T7Q, T7P);
Chris@42 889 T7R = FMA(KP923879532, T7Q, T7P);
Chris@42 890 T88 = FNMS(KP923879532, T7L, T7K);
Chris@42 891 T7M = FMA(KP923879532, T7L, T7K);
Chris@42 892 T7O = W[19];
Chris@42 893 T87 = W[50];
Chris@42 894 T8a = W[51];
Chris@42 895 T7N = T7J * T7M;
Chris@42 896 T83 = T7O * T7M;
Chris@42 897 T89 = T87 * T88;
Chris@42 898 T8j = T8a * T88;
Chris@42 899 T7T = W[20];
Chris@42 900 T8e = FNMS(KP831469612, T7V, T7U);
Chris@42 901 T7W = FMA(KP831469612, T7V, T7U);
Chris@42 902 T8h = FMA(KP831469612, T80, T7Z);
Chris@42 903 T81 = FNMS(KP831469612, T80, T7Z);
Chris@42 904 T8d = W[52];
Chris@42 905 T7X = T7T * T7W;
Chris@42 906 }
Chris@42 907 T7S = FNMS(T7O, T7R, T7N);
Chris@42 908 T85 = T7T * T81;
Chris@42 909 T8l = T8d * T8h;
Chris@42 910 T8f = T8d * T8e;
Chris@42 911 T84 = FMA(T7J, T7R, T83);
Chris@42 912 T7Y = W[21];
Chris@42 913 T8c = FNMS(T8a, T8b, T89);
Chris@42 914 T8k = FMA(T87, T8b, T8j);
Chris@42 915 T8g = W[53];
Chris@42 916 T86 = FNMS(T7Y, T7W, T85);
Chris@42 917 T82 = FMA(T7Y, T81, T7X);
Chris@42 918 }
Chris@42 919 }
Chris@42 920 }
Chris@42 921 }
Chris@42 922 }
Chris@42 923 T8m = FNMS(T8g, T8e, T8l);
Chris@42 924 T8i = FMA(T8g, T8h, T8f);
Chris@42 925 Im[WS(rs, 5)] = T86 - T84;
Chris@42 926 Ip[WS(rs, 5)] = T84 + T86;
Chris@42 927 Rm[WS(rs, 5)] = T7S + T82;
Chris@42 928 Rp[WS(rs, 5)] = T7S - T82;
Chris@42 929 Im[WS(rs, 13)] = T8m - T8k;
Chris@42 930 Ip[WS(rs, 13)] = T8k + T8m;
Chris@42 931 Rm[WS(rs, 13)] = T8c + T8i;
Chris@42 932 Rp[WS(rs, 13)] = T8c - T8i;
Chris@42 933 }
Chris@42 934 }
Chris@42 935 }
Chris@42 936
Chris@42 937 static const tw_instr twinstr[] = {
Chris@42 938 {TW_FULL, 1, 32},
Chris@42 939 {TW_NEXT, 1, 0}
Chris@42 940 };
Chris@42 941
Chris@42 942 static const hc2c_desc desc = { 32, "hc2cbdft_32", twinstr, &GENUS, {300, 62, 198, 0} };
Chris@42 943
Chris@42 944 void X(codelet_hc2cbdft_32) (planner *p) {
Chris@42 945 X(khc2c_register) (p, hc2cbdft_32, &desc, HC2C_VIA_DFT);
Chris@42 946 }
Chris@42 947 #else /* HAVE_FMA */
Chris@42 948
Chris@42 949 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft_32 -include hc2cb.h */
Chris@42 950
Chris@42 951 /*
Chris@42 952 * This function contains 498 FP additions, 208 FP multiplications,
Chris@42 953 * (or, 404 additions, 114 multiplications, 94 fused multiply/add),
Chris@42 954 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@42 955 */
Chris@42 956 #include "hc2cb.h"
Chris@42 957
Chris@42 958 static void hc2cbdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 959 {
Chris@42 960 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 961 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 962 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 963 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 964 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 965 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 966 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 967 {
Chris@42 968 INT m;
Chris@42 969 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 970 E Tf, T4a, T6h, T7Z, T6P, T8e, T1j, T4v, T2R, T4L, T5C, T7E, T6a, T7U, T3n;
Chris@42 971 E T4q, TZ, T38, T2p, T4B, T7M, T7R, T2y, T4C, T5Y, T63, T6C, T86, T4i, T4n;
Chris@42 972 E T6z, T85, TK, T31, T1Y, T4y, T7J, T7Q, T27, T4z, T5R, T62, T6v, T83, T4f;
Chris@42 973 E T4m, T6s, T82, Tu, T4p, T6o, T8f, T6M, T80, T1G, T4K, T2I, T4w, T5J, T7T;
Chris@42 974 E T67, T7F, T3g, T4b;
Chris@42 975 {
Chris@42 976 E T3, T2M, T16, T3k, T6, T13, T2P, T3l, Td, T3i, T1h, T2K, Ta, T3h, T1c;
Chris@42 977 E T2J;
Chris@42 978 {
Chris@42 979 E T1, T2, T2N, T2O;
Chris@42 980 T1 = Rp[0];
Chris@42 981 T2 = Rm[WS(rs, 15)];
Chris@42 982 T3 = T1 + T2;
Chris@42 983 T2M = T1 - T2;
Chris@42 984 {
Chris@42 985 E T14, T15, T4, T5;
Chris@42 986 T14 = Ip[0];
Chris@42 987 T15 = Im[WS(rs, 15)];
Chris@42 988 T16 = T14 + T15;
Chris@42 989 T3k = T14 - T15;
Chris@42 990 T4 = Rp[WS(rs, 8)];
Chris@42 991 T5 = Rm[WS(rs, 7)];
Chris@42 992 T6 = T4 + T5;
Chris@42 993 T13 = T4 - T5;
Chris@42 994 }
Chris@42 995 T2N = Ip[WS(rs, 8)];
Chris@42 996 T2O = Im[WS(rs, 7)];
Chris@42 997 T2P = T2N + T2O;
Chris@42 998 T3l = T2N - T2O;
Chris@42 999 {
Chris@42 1000 E Tb, Tc, T1d, T1e, T1f, T1g;
Chris@42 1001 Tb = Rm[WS(rs, 3)];
Chris@42 1002 Tc = Rp[WS(rs, 12)];
Chris@42 1003 T1d = Tb - Tc;
Chris@42 1004 T1e = Im[WS(rs, 3)];
Chris@42 1005 T1f = Ip[WS(rs, 12)];
Chris@42 1006 T1g = T1e + T1f;
Chris@42 1007 Td = Tb + Tc;
Chris@42 1008 T3i = T1f - T1e;
Chris@42 1009 T1h = T1d + T1g;
Chris@42 1010 T2K = T1d - T1g;
Chris@42 1011 }
Chris@42 1012 {
Chris@42 1013 E T8, T9, T18, T19, T1a, T1b;
Chris@42 1014 T8 = Rp[WS(rs, 4)];
Chris@42 1015 T9 = Rm[WS(rs, 11)];
Chris@42 1016 T18 = T8 - T9;
Chris@42 1017 T19 = Ip[WS(rs, 4)];
Chris@42 1018 T1a = Im[WS(rs, 11)];
Chris@42 1019 T1b = T19 + T1a;
Chris@42 1020 Ta = T8 + T9;
Chris@42 1021 T3h = T19 - T1a;
Chris@42 1022 T1c = T18 + T1b;
Chris@42 1023 T2J = T18 - T1b;
Chris@42 1024 }
Chris@42 1025 }
Chris@42 1026 {
Chris@42 1027 E T7, Te, T6f, T6g;
Chris@42 1028 T7 = T3 + T6;
Chris@42 1029 Te = Ta + Td;
Chris@42 1030 Tf = T7 + Te;
Chris@42 1031 T4a = T7 - Te;
Chris@42 1032 T6f = T16 - T13;
Chris@42 1033 T6g = KP707106781 * (T2J - T2K);
Chris@42 1034 T6h = T6f + T6g;
Chris@42 1035 T7Z = T6f - T6g;
Chris@42 1036 }
Chris@42 1037 {
Chris@42 1038 E T6N, T6O, T17, T1i;
Chris@42 1039 T6N = T2M + T2P;
Chris@42 1040 T6O = KP707106781 * (T1c + T1h);
Chris@42 1041 T6P = T6N - T6O;
Chris@42 1042 T8e = T6O + T6N;
Chris@42 1043 T17 = T13 + T16;
Chris@42 1044 T1i = KP707106781 * (T1c - T1h);
Chris@42 1045 T1j = T17 + T1i;
Chris@42 1046 T4v = T17 - T1i;
Chris@42 1047 }
Chris@42 1048 {
Chris@42 1049 E T2L, T2Q, T5A, T5B;
Chris@42 1050 T2L = KP707106781 * (T2J + T2K);
Chris@42 1051 T2Q = T2M - T2P;
Chris@42 1052 T2R = T2L + T2Q;
Chris@42 1053 T4L = T2Q - T2L;
Chris@42 1054 T5A = T3 - T6;
Chris@42 1055 T5B = T3i - T3h;
Chris@42 1056 T5C = T5A + T5B;
Chris@42 1057 T7E = T5A - T5B;
Chris@42 1058 }
Chris@42 1059 {
Chris@42 1060 E T68, T69, T3j, T3m;
Chris@42 1061 T68 = Ta - Td;
Chris@42 1062 T69 = T3k - T3l;
Chris@42 1063 T6a = T68 + T69;
Chris@42 1064 T7U = T69 - T68;
Chris@42 1065 T3j = T3h + T3i;
Chris@42 1066 T3m = T3k + T3l;
Chris@42 1067 T3n = T3j + T3m;
Chris@42 1068 T4q = T3m - T3j;
Chris@42 1069 }
Chris@42 1070 }
Chris@42 1071 {
Chris@42 1072 E TR, T5S, T29, T2t, T2c, T5W, T2w, T37, TY, T5T, T5V, T2i, T2n, T2r, T34;
Chris@42 1073 E T2q, T6A, T6B;
Chris@42 1074 {
Chris@42 1075 E TL, TM, TN, TO, TP, TQ;
Chris@42 1076 TL = Rm[0];
Chris@42 1077 TM = Rp[WS(rs, 15)];
Chris@42 1078 TN = TL + TM;
Chris@42 1079 TO = Rp[WS(rs, 7)];
Chris@42 1080 TP = Rm[WS(rs, 8)];
Chris@42 1081 TQ = TO + TP;
Chris@42 1082 TR = TN + TQ;
Chris@42 1083 T5S = TN - TQ;
Chris@42 1084 T29 = TO - TP;
Chris@42 1085 T2t = TL - TM;
Chris@42 1086 }
Chris@42 1087 {
Chris@42 1088 E T2a, T2b, T35, T2u, T2v, T36;
Chris@42 1089 T2a = Im[0];
Chris@42 1090 T2b = Ip[WS(rs, 15)];
Chris@42 1091 T35 = T2b - T2a;
Chris@42 1092 T2u = Ip[WS(rs, 7)];
Chris@42 1093 T2v = Im[WS(rs, 8)];
Chris@42 1094 T36 = T2u - T2v;
Chris@42 1095 T2c = T2a + T2b;
Chris@42 1096 T5W = T35 - T36;
Chris@42 1097 T2w = T2u + T2v;
Chris@42 1098 T37 = T35 + T36;
Chris@42 1099 }
Chris@42 1100 {
Chris@42 1101 E TU, T2e, T2h, T32, TX, T2j, T2m, T33;
Chris@42 1102 {
Chris@42 1103 E TS, TT, T2f, T2g;
Chris@42 1104 TS = Rp[WS(rs, 3)];
Chris@42 1105 TT = Rm[WS(rs, 12)];
Chris@42 1106 TU = TS + TT;
Chris@42 1107 T2e = TS - TT;
Chris@42 1108 T2f = Ip[WS(rs, 3)];
Chris@42 1109 T2g = Im[WS(rs, 12)];
Chris@42 1110 T2h = T2f + T2g;
Chris@42 1111 T32 = T2f - T2g;
Chris@42 1112 }
Chris@42 1113 {
Chris@42 1114 E TV, TW, T2k, T2l;
Chris@42 1115 TV = Rm[WS(rs, 4)];
Chris@42 1116 TW = Rp[WS(rs, 11)];
Chris@42 1117 TX = TV + TW;
Chris@42 1118 T2j = TV - TW;
Chris@42 1119 T2k = Im[WS(rs, 4)];
Chris@42 1120 T2l = Ip[WS(rs, 11)];
Chris@42 1121 T2m = T2k + T2l;
Chris@42 1122 T33 = T2l - T2k;
Chris@42 1123 }
Chris@42 1124 TY = TU + TX;
Chris@42 1125 T5T = T33 - T32;
Chris@42 1126 T5V = TU - TX;
Chris@42 1127 T2i = T2e + T2h;
Chris@42 1128 T2n = T2j + T2m;
Chris@42 1129 T2r = T2j - T2m;
Chris@42 1130 T34 = T32 + T33;
Chris@42 1131 T2q = T2e - T2h;
Chris@42 1132 }
Chris@42 1133 TZ = TR + TY;
Chris@42 1134 T38 = T34 + T37;
Chris@42 1135 {
Chris@42 1136 E T2d, T2o, T7K, T7L;
Chris@42 1137 T2d = T29 - T2c;
Chris@42 1138 T2o = KP707106781 * (T2i - T2n);
Chris@42 1139 T2p = T2d + T2o;
Chris@42 1140 T4B = T2d - T2o;
Chris@42 1141 T7K = T5S - T5T;
Chris@42 1142 T7L = T5W - T5V;
Chris@42 1143 T7M = FMA(KP382683432, T7K, KP923879532 * T7L);
Chris@42 1144 T7R = FNMS(KP923879532, T7K, KP382683432 * T7L);
Chris@42 1145 }
Chris@42 1146 {
Chris@42 1147 E T2s, T2x, T5U, T5X;
Chris@42 1148 T2s = KP707106781 * (T2q + T2r);
Chris@42 1149 T2x = T2t - T2w;
Chris@42 1150 T2y = T2s + T2x;
Chris@42 1151 T4C = T2x - T2s;
Chris@42 1152 T5U = T5S + T5T;
Chris@42 1153 T5X = T5V + T5W;
Chris@42 1154 T5Y = FMA(KP923879532, T5U, KP382683432 * T5X);
Chris@42 1155 T63 = FNMS(KP382683432, T5U, KP923879532 * T5X);
Chris@42 1156 }
Chris@42 1157 T6A = T2t + T2w;
Chris@42 1158 T6B = KP707106781 * (T2i + T2n);
Chris@42 1159 T6C = T6A - T6B;
Chris@42 1160 T86 = T6B + T6A;
Chris@42 1161 {
Chris@42 1162 E T4g, T4h, T6x, T6y;
Chris@42 1163 T4g = TR - TY;
Chris@42 1164 T4h = T37 - T34;
Chris@42 1165 T4i = T4g + T4h;
Chris@42 1166 T4n = T4h - T4g;
Chris@42 1167 T6x = KP707106781 * (T2q - T2r);
Chris@42 1168 T6y = T29 + T2c;
Chris@42 1169 T6z = T6x - T6y;
Chris@42 1170 T85 = T6y + T6x;
Chris@42 1171 }
Chris@42 1172 }
Chris@42 1173 {
Chris@42 1174 E TC, T5L, T1I, T22, T1L, T5P, T25, T30, TJ, T5M, T5O, T1R, T1W, T20, T2X;
Chris@42 1175 E T1Z, T6t, T6u;
Chris@42 1176 {
Chris@42 1177 E Tw, Tx, Ty, Tz, TA, TB;
Chris@42 1178 Tw = Rp[WS(rs, 1)];
Chris@42 1179 Tx = Rm[WS(rs, 14)];
Chris@42 1180 Ty = Tw + Tx;
Chris@42 1181 Tz = Rp[WS(rs, 9)];
Chris@42 1182 TA = Rm[WS(rs, 6)];
Chris@42 1183 TB = Tz + TA;
Chris@42 1184 TC = Ty + TB;
Chris@42 1185 T5L = Ty - TB;
Chris@42 1186 T1I = Tz - TA;
Chris@42 1187 T22 = Tw - Tx;
Chris@42 1188 }
Chris@42 1189 {
Chris@42 1190 E T1J, T1K, T2Y, T23, T24, T2Z;
Chris@42 1191 T1J = Ip[WS(rs, 1)];
Chris@42 1192 T1K = Im[WS(rs, 14)];
Chris@42 1193 T2Y = T1J - T1K;
Chris@42 1194 T23 = Ip[WS(rs, 9)];
Chris@42 1195 T24 = Im[WS(rs, 6)];
Chris@42 1196 T2Z = T23 - T24;
Chris@42 1197 T1L = T1J + T1K;
Chris@42 1198 T5P = T2Y - T2Z;
Chris@42 1199 T25 = T23 + T24;
Chris@42 1200 T30 = T2Y + T2Z;
Chris@42 1201 }
Chris@42 1202 {
Chris@42 1203 E TF, T1N, T1Q, T2V, TI, T1S, T1V, T2W;
Chris@42 1204 {
Chris@42 1205 E TD, TE, T1O, T1P;
Chris@42 1206 TD = Rp[WS(rs, 5)];
Chris@42 1207 TE = Rm[WS(rs, 10)];
Chris@42 1208 TF = TD + TE;
Chris@42 1209 T1N = TD - TE;
Chris@42 1210 T1O = Ip[WS(rs, 5)];
Chris@42 1211 T1P = Im[WS(rs, 10)];
Chris@42 1212 T1Q = T1O + T1P;
Chris@42 1213 T2V = T1O - T1P;
Chris@42 1214 }
Chris@42 1215 {
Chris@42 1216 E TG, TH, T1T, T1U;
Chris@42 1217 TG = Rm[WS(rs, 2)];
Chris@42 1218 TH = Rp[WS(rs, 13)];
Chris@42 1219 TI = TG + TH;
Chris@42 1220 T1S = TG - TH;
Chris@42 1221 T1T = Im[WS(rs, 2)];
Chris@42 1222 T1U = Ip[WS(rs, 13)];
Chris@42 1223 T1V = T1T + T1U;
Chris@42 1224 T2W = T1U - T1T;
Chris@42 1225 }
Chris@42 1226 TJ = TF + TI;
Chris@42 1227 T5M = T2W - T2V;
Chris@42 1228 T5O = TF - TI;
Chris@42 1229 T1R = T1N + T1Q;
Chris@42 1230 T1W = T1S + T1V;
Chris@42 1231 T20 = T1S - T1V;
Chris@42 1232 T2X = T2V + T2W;
Chris@42 1233 T1Z = T1N - T1Q;
Chris@42 1234 }
Chris@42 1235 TK = TC + TJ;
Chris@42 1236 T31 = T2X + T30;
Chris@42 1237 {
Chris@42 1238 E T1M, T1X, T7H, T7I;
Chris@42 1239 T1M = T1I + T1L;
Chris@42 1240 T1X = KP707106781 * (T1R - T1W);
Chris@42 1241 T1Y = T1M + T1X;
Chris@42 1242 T4y = T1M - T1X;
Chris@42 1243 T7H = T5L - T5M;
Chris@42 1244 T7I = T5P - T5O;
Chris@42 1245 T7J = FNMS(KP923879532, T7I, KP382683432 * T7H);
Chris@42 1246 T7Q = FMA(KP923879532, T7H, KP382683432 * T7I);
Chris@42 1247 }
Chris@42 1248 {
Chris@42 1249 E T21, T26, T5N, T5Q;
Chris@42 1250 T21 = KP707106781 * (T1Z + T20);
Chris@42 1251 T26 = T22 - T25;
Chris@42 1252 T27 = T21 + T26;
Chris@42 1253 T4z = T26 - T21;
Chris@42 1254 T5N = T5L + T5M;
Chris@42 1255 T5Q = T5O + T5P;
Chris@42 1256 T5R = FNMS(KP382683432, T5Q, KP923879532 * T5N);
Chris@42 1257 T62 = FMA(KP382683432, T5N, KP923879532 * T5Q);
Chris@42 1258 }
Chris@42 1259 T6t = T22 + T25;
Chris@42 1260 T6u = KP707106781 * (T1R + T1W);
Chris@42 1261 T6v = T6t - T6u;
Chris@42 1262 T83 = T6u + T6t;
Chris@42 1263 {
Chris@42 1264 E T4d, T4e, T6q, T6r;
Chris@42 1265 T4d = TC - TJ;
Chris@42 1266 T4e = T30 - T2X;
Chris@42 1267 T4f = T4d - T4e;
Chris@42 1268 T4m = T4d + T4e;
Chris@42 1269 T6q = T1L - T1I;
Chris@42 1270 T6r = KP707106781 * (T1Z - T20);
Chris@42 1271 T6s = T6q + T6r;
Chris@42 1272 T82 = T6q - T6r;
Chris@42 1273 }
Chris@42 1274 }
Chris@42 1275 {
Chris@42 1276 E Ti, T3a, Tl, T3b, T1o, T1t, T6j, T6i, T5E, T5D, Tp, T3d, Ts, T3e, T1z;
Chris@42 1277 E T1E, T6m, T6l, T5H, T5G;
Chris@42 1278 {
Chris@42 1279 E T1p, T1n, T1k, T1s;
Chris@42 1280 {
Chris@42 1281 E Tg, Th, T1l, T1m;
Chris@42 1282 Tg = Rp[WS(rs, 2)];
Chris@42 1283 Th = Rm[WS(rs, 13)];
Chris@42 1284 Ti = Tg + Th;
Chris@42 1285 T1p = Tg - Th;
Chris@42 1286 T1l = Ip[WS(rs, 2)];
Chris@42 1287 T1m = Im[WS(rs, 13)];
Chris@42 1288 T1n = T1l + T1m;
Chris@42 1289 T3a = T1l - T1m;
Chris@42 1290 }
Chris@42 1291 {
Chris@42 1292 E Tj, Tk, T1q, T1r;
Chris@42 1293 Tj = Rp[WS(rs, 10)];
Chris@42 1294 Tk = Rm[WS(rs, 5)];
Chris@42 1295 Tl = Tj + Tk;
Chris@42 1296 T1k = Tj - Tk;
Chris@42 1297 T1q = Ip[WS(rs, 10)];
Chris@42 1298 T1r = Im[WS(rs, 5)];
Chris@42 1299 T1s = T1q + T1r;
Chris@42 1300 T3b = T1q - T1r;
Chris@42 1301 }
Chris@42 1302 T1o = T1k + T1n;
Chris@42 1303 T1t = T1p - T1s;
Chris@42 1304 T6j = T1p + T1s;
Chris@42 1305 T6i = T1n - T1k;
Chris@42 1306 T5E = T3a - T3b;
Chris@42 1307 T5D = Ti - Tl;
Chris@42 1308 }
Chris@42 1309 {
Chris@42 1310 E T1A, T1y, T1v, T1D;
Chris@42 1311 {
Chris@42 1312 E Tn, To, T1w, T1x;
Chris@42 1313 Tn = Rm[WS(rs, 1)];
Chris@42 1314 To = Rp[WS(rs, 14)];
Chris@42 1315 Tp = Tn + To;
Chris@42 1316 T1A = Tn - To;
Chris@42 1317 T1w = Im[WS(rs, 1)];
Chris@42 1318 T1x = Ip[WS(rs, 14)];
Chris@42 1319 T1y = T1w + T1x;
Chris@42 1320 T3d = T1x - T1w;
Chris@42 1321 }
Chris@42 1322 {
Chris@42 1323 E Tq, Tr, T1B, T1C;
Chris@42 1324 Tq = Rp[WS(rs, 6)];
Chris@42 1325 Tr = Rm[WS(rs, 9)];
Chris@42 1326 Ts = Tq + Tr;
Chris@42 1327 T1v = Tq - Tr;
Chris@42 1328 T1B = Ip[WS(rs, 6)];
Chris@42 1329 T1C = Im[WS(rs, 9)];
Chris@42 1330 T1D = T1B + T1C;
Chris@42 1331 T3e = T1B - T1C;
Chris@42 1332 }
Chris@42 1333 T1z = T1v - T1y;
Chris@42 1334 T1E = T1A - T1D;
Chris@42 1335 T6m = T1A + T1D;
Chris@42 1336 T6l = T1v + T1y;
Chris@42 1337 T5H = T3d - T3e;
Chris@42 1338 T5G = Tp - Ts;
Chris@42 1339 }
Chris@42 1340 {
Chris@42 1341 E Tm, Tt, T6k, T6n;
Chris@42 1342 Tm = Ti + Tl;
Chris@42 1343 Tt = Tp + Ts;
Chris@42 1344 Tu = Tm + Tt;
Chris@42 1345 T4p = Tm - Tt;
Chris@42 1346 T6k = FMA(KP382683432, T6i, KP923879532 * T6j);
Chris@42 1347 T6n = FMA(KP382683432, T6l, KP923879532 * T6m);
Chris@42 1348 T6o = T6k - T6n;
Chris@42 1349 T8f = T6k + T6n;
Chris@42 1350 }
Chris@42 1351 {
Chris@42 1352 E T6K, T6L, T1u, T1F;
Chris@42 1353 T6K = FNMS(KP923879532, T6i, KP382683432 * T6j);
Chris@42 1354 T6L = FNMS(KP923879532, T6l, KP382683432 * T6m);
Chris@42 1355 T6M = T6K + T6L;
Chris@42 1356 T80 = T6K - T6L;
Chris@42 1357 T1u = FMA(KP923879532, T1o, KP382683432 * T1t);
Chris@42 1358 T1F = FNMS(KP382683432, T1E, KP923879532 * T1z);
Chris@42 1359 T1G = T1u + T1F;
Chris@42 1360 T4K = T1F - T1u;
Chris@42 1361 }
Chris@42 1362 {
Chris@42 1363 E T2G, T2H, T5F, T5I;
Chris@42 1364 T2G = FNMS(KP382683432, T1o, KP923879532 * T1t);
Chris@42 1365 T2H = FMA(KP382683432, T1z, KP923879532 * T1E);
Chris@42 1366 T2I = T2G + T2H;
Chris@42 1367 T4w = T2G - T2H;
Chris@42 1368 T5F = T5D - T5E;
Chris@42 1369 T5I = T5G + T5H;
Chris@42 1370 T5J = KP707106781 * (T5F + T5I);
Chris@42 1371 T7T = KP707106781 * (T5F - T5I);
Chris@42 1372 }
Chris@42 1373 {
Chris@42 1374 E T65, T66, T3c, T3f;
Chris@42 1375 T65 = T5D + T5E;
Chris@42 1376 T66 = T5H - T5G;
Chris@42 1377 T67 = KP707106781 * (T65 + T66);
Chris@42 1378 T7F = KP707106781 * (T66 - T65);
Chris@42 1379 T3c = T3a + T3b;
Chris@42 1380 T3f = T3d + T3e;
Chris@42 1381 T3g = T3c + T3f;
Chris@42 1382 T4b = T3f - T3c;
Chris@42 1383 }
Chris@42 1384 }
Chris@42 1385 {
Chris@42 1386 E T11, T3s, T3p, T3u, T3K, T40, T3G, T3Y, T2T, T43, T3z, T3P, T2B, T45, T3x;
Chris@42 1387 E T3T;
Chris@42 1388 {
Chris@42 1389 E Tv, T10, T3E, T3F;
Chris@42 1390 Tv = Tf + Tu;
Chris@42 1391 T10 = TK + TZ;
Chris@42 1392 T11 = Tv + T10;
Chris@42 1393 T3s = Tv - T10;
Chris@42 1394 {
Chris@42 1395 E T39, T3o, T3I, T3J;
Chris@42 1396 T39 = T31 + T38;
Chris@42 1397 T3o = T3g + T3n;
Chris@42 1398 T3p = T39 + T3o;
Chris@42 1399 T3u = T3o - T39;
Chris@42 1400 T3I = TK - TZ;
Chris@42 1401 T3J = T3n - T3g;
Chris@42 1402 T3K = T3I + T3J;
Chris@42 1403 T40 = T3J - T3I;
Chris@42 1404 }
Chris@42 1405 T3E = Tf - Tu;
Chris@42 1406 T3F = T38 - T31;
Chris@42 1407 T3G = T3E + T3F;
Chris@42 1408 T3Y = T3E - T3F;
Chris@42 1409 {
Chris@42 1410 E T2S, T3N, T2F, T3O, T2D, T2E;
Chris@42 1411 T2S = T2I + T2R;
Chris@42 1412 T3N = T1j - T1G;
Chris@42 1413 T2D = FNMS(KP195090322, T1Y, KP980785280 * T27);
Chris@42 1414 T2E = FMA(KP195090322, T2p, KP980785280 * T2y);
Chris@42 1415 T2F = T2D + T2E;
Chris@42 1416 T3O = T2D - T2E;
Chris@42 1417 T2T = T2F + T2S;
Chris@42 1418 T43 = T3N - T3O;
Chris@42 1419 T3z = T2S - T2F;
Chris@42 1420 T3P = T3N + T3O;
Chris@42 1421 }
Chris@42 1422 {
Chris@42 1423 E T1H, T3S, T2A, T3R, T28, T2z;
Chris@42 1424 T1H = T1j + T1G;
Chris@42 1425 T3S = T2R - T2I;
Chris@42 1426 T28 = FMA(KP980785280, T1Y, KP195090322 * T27);
Chris@42 1427 T2z = FNMS(KP195090322, T2y, KP980785280 * T2p);
Chris@42 1428 T2A = T28 + T2z;
Chris@42 1429 T3R = T2z - T28;
Chris@42 1430 T2B = T1H + T2A;
Chris@42 1431 T45 = T3S - T3R;
Chris@42 1432 T3x = T1H - T2A;
Chris@42 1433 T3T = T3R + T3S;
Chris@42 1434 }
Chris@42 1435 }
Chris@42 1436 {
Chris@42 1437 E T2U, T3q, T12, T2C;
Chris@42 1438 T12 = W[0];
Chris@42 1439 T2C = W[1];
Chris@42 1440 T2U = FMA(T12, T2B, T2C * T2T);
Chris@42 1441 T3q = FNMS(T2C, T2B, T12 * T2T);
Chris@42 1442 Rp[0] = T11 - T2U;
Chris@42 1443 Ip[0] = T3p + T3q;
Chris@42 1444 Rm[0] = T11 + T2U;
Chris@42 1445 Im[0] = T3q - T3p;
Chris@42 1446 }
Chris@42 1447 {
Chris@42 1448 E T41, T47, T46, T48;
Chris@42 1449 {
Chris@42 1450 E T3X, T3Z, T42, T44;
Chris@42 1451 T3X = W[46];
Chris@42 1452 T3Z = W[47];
Chris@42 1453 T41 = FNMS(T3Z, T40, T3X * T3Y);
Chris@42 1454 T47 = FMA(T3Z, T3Y, T3X * T40);
Chris@42 1455 T42 = W[48];
Chris@42 1456 T44 = W[49];
Chris@42 1457 T46 = FMA(T42, T43, T44 * T45);
Chris@42 1458 T48 = FNMS(T44, T43, T42 * T45);
Chris@42 1459 }
Chris@42 1460 Rp[WS(rs, 12)] = T41 - T46;
Chris@42 1461 Ip[WS(rs, 12)] = T47 + T48;
Chris@42 1462 Rm[WS(rs, 12)] = T41 + T46;
Chris@42 1463 Im[WS(rs, 12)] = T48 - T47;
Chris@42 1464 }
Chris@42 1465 {
Chris@42 1466 E T3v, T3B, T3A, T3C;
Chris@42 1467 {
Chris@42 1468 E T3r, T3t, T3w, T3y;
Chris@42 1469 T3r = W[30];
Chris@42 1470 T3t = W[31];
Chris@42 1471 T3v = FNMS(T3t, T3u, T3r * T3s);
Chris@42 1472 T3B = FMA(T3t, T3s, T3r * T3u);
Chris@42 1473 T3w = W[32];
Chris@42 1474 T3y = W[33];
Chris@42 1475 T3A = FMA(T3w, T3x, T3y * T3z);
Chris@42 1476 T3C = FNMS(T3y, T3x, T3w * T3z);
Chris@42 1477 }
Chris@42 1478 Rp[WS(rs, 8)] = T3v - T3A;
Chris@42 1479 Ip[WS(rs, 8)] = T3B + T3C;
Chris@42 1480 Rm[WS(rs, 8)] = T3v + T3A;
Chris@42 1481 Im[WS(rs, 8)] = T3C - T3B;
Chris@42 1482 }
Chris@42 1483 {
Chris@42 1484 E T3L, T3V, T3U, T3W;
Chris@42 1485 {
Chris@42 1486 E T3D, T3H, T3M, T3Q;
Chris@42 1487 T3D = W[14];
Chris@42 1488 T3H = W[15];
Chris@42 1489 T3L = FNMS(T3H, T3K, T3D * T3G);
Chris@42 1490 T3V = FMA(T3H, T3G, T3D * T3K);
Chris@42 1491 T3M = W[16];
Chris@42 1492 T3Q = W[17];
Chris@42 1493 T3U = FMA(T3M, T3P, T3Q * T3T);
Chris@42 1494 T3W = FNMS(T3Q, T3P, T3M * T3T);
Chris@42 1495 }
Chris@42 1496 Rp[WS(rs, 4)] = T3L - T3U;
Chris@42 1497 Ip[WS(rs, 4)] = T3V + T3W;
Chris@42 1498 Rm[WS(rs, 4)] = T3L + T3U;
Chris@42 1499 Im[WS(rs, 4)] = T3W - T3V;
Chris@42 1500 }
Chris@42 1501 }
Chris@42 1502 {
Chris@42 1503 E T7O, T8m, T7W, T8o, T8E, T8U, T8A, T8S, T8h, T8X, T8t, T8J, T89, T8Z, T8r;
Chris@42 1504 E T8N;
Chris@42 1505 {
Chris@42 1506 E T7G, T7N, T8y, T8z;
Chris@42 1507 T7G = T7E + T7F;
Chris@42 1508 T7N = T7J + T7M;
Chris@42 1509 T7O = T7G + T7N;
Chris@42 1510 T8m = T7G - T7N;
Chris@42 1511 {
Chris@42 1512 E T7S, T7V, T8C, T8D;
Chris@42 1513 T7S = T7Q + T7R;
Chris@42 1514 T7V = T7T + T7U;
Chris@42 1515 T7W = T7S + T7V;
Chris@42 1516 T8o = T7V - T7S;
Chris@42 1517 T8C = T7J - T7M;
Chris@42 1518 T8D = T7U - T7T;
Chris@42 1519 T8E = T8C + T8D;
Chris@42 1520 T8U = T8D - T8C;
Chris@42 1521 }
Chris@42 1522 T8y = T7E - T7F;
Chris@42 1523 T8z = T7R - T7Q;
Chris@42 1524 T8A = T8y + T8z;
Chris@42 1525 T8S = T8y - T8z;
Chris@42 1526 {
Chris@42 1527 E T8g, T8H, T8d, T8I, T8b, T8c;
Chris@42 1528 T8g = T8e - T8f;
Chris@42 1529 T8H = T7Z - T80;
Chris@42 1530 T8b = FNMS(KP980785280, T82, KP195090322 * T83);
Chris@42 1531 T8c = FNMS(KP980785280, T85, KP195090322 * T86);
Chris@42 1532 T8d = T8b + T8c;
Chris@42 1533 T8I = T8b - T8c;
Chris@42 1534 T8h = T8d + T8g;
Chris@42 1535 T8X = T8H - T8I;
Chris@42 1536 T8t = T8g - T8d;
Chris@42 1537 T8J = T8H + T8I;
Chris@42 1538 }
Chris@42 1539 {
Chris@42 1540 E T81, T8L, T88, T8M, T84, T87;
Chris@42 1541 T81 = T7Z + T80;
Chris@42 1542 T8L = T8f + T8e;
Chris@42 1543 T84 = FMA(KP195090322, T82, KP980785280 * T83);
Chris@42 1544 T87 = FMA(KP195090322, T85, KP980785280 * T86);
Chris@42 1545 T88 = T84 - T87;
Chris@42 1546 T8M = T84 + T87;
Chris@42 1547 T89 = T81 + T88;
Chris@42 1548 T8Z = T8M + T8L;
Chris@42 1549 T8r = T81 - T88;
Chris@42 1550 T8N = T8L - T8M;
Chris@42 1551 }
Chris@42 1552 }
Chris@42 1553 {
Chris@42 1554 E T7X, T8j, T8i, T8k;
Chris@42 1555 {
Chris@42 1556 E T7D, T7P, T7Y, T8a;
Chris@42 1557 T7D = W[10];
Chris@42 1558 T7P = W[11];
Chris@42 1559 T7X = FNMS(T7P, T7W, T7D * T7O);
Chris@42 1560 T8j = FMA(T7P, T7O, T7D * T7W);
Chris@42 1561 T7Y = W[12];
Chris@42 1562 T8a = W[13];
Chris@42 1563 T8i = FMA(T7Y, T89, T8a * T8h);
Chris@42 1564 T8k = FNMS(T8a, T89, T7Y * T8h);
Chris@42 1565 }
Chris@42 1566 Rp[WS(rs, 3)] = T7X - T8i;
Chris@42 1567 Ip[WS(rs, 3)] = T8j + T8k;
Chris@42 1568 Rm[WS(rs, 3)] = T7X + T8i;
Chris@42 1569 Im[WS(rs, 3)] = T8k - T8j;
Chris@42 1570 }
Chris@42 1571 {
Chris@42 1572 E T8V, T91, T90, T92;
Chris@42 1573 {
Chris@42 1574 E T8R, T8T, T8W, T8Y;
Chris@42 1575 T8R = W[58];
Chris@42 1576 T8T = W[59];
Chris@42 1577 T8V = FNMS(T8T, T8U, T8R * T8S);
Chris@42 1578 T91 = FMA(T8T, T8S, T8R * T8U);
Chris@42 1579 T8W = W[60];
Chris@42 1580 T8Y = W[61];
Chris@42 1581 T90 = FMA(T8W, T8X, T8Y * T8Z);
Chris@42 1582 T92 = FNMS(T8Y, T8X, T8W * T8Z);
Chris@42 1583 }
Chris@42 1584 Rp[WS(rs, 15)] = T8V - T90;
Chris@42 1585 Ip[WS(rs, 15)] = T91 + T92;
Chris@42 1586 Rm[WS(rs, 15)] = T8V + T90;
Chris@42 1587 Im[WS(rs, 15)] = T92 - T91;
Chris@42 1588 }
Chris@42 1589 {
Chris@42 1590 E T8p, T8v, T8u, T8w;
Chris@42 1591 {
Chris@42 1592 E T8l, T8n, T8q, T8s;
Chris@42 1593 T8l = W[42];
Chris@42 1594 T8n = W[43];
Chris@42 1595 T8p = FNMS(T8n, T8o, T8l * T8m);
Chris@42 1596 T8v = FMA(T8n, T8m, T8l * T8o);
Chris@42 1597 T8q = W[44];
Chris@42 1598 T8s = W[45];
Chris@42 1599 T8u = FMA(T8q, T8r, T8s * T8t);
Chris@42 1600 T8w = FNMS(T8s, T8r, T8q * T8t);
Chris@42 1601 }
Chris@42 1602 Rp[WS(rs, 11)] = T8p - T8u;
Chris@42 1603 Ip[WS(rs, 11)] = T8v + T8w;
Chris@42 1604 Rm[WS(rs, 11)] = T8p + T8u;
Chris@42 1605 Im[WS(rs, 11)] = T8w - T8v;
Chris@42 1606 }
Chris@42 1607 {
Chris@42 1608 E T8F, T8P, T8O, T8Q;
Chris@42 1609 {
Chris@42 1610 E T8x, T8B, T8G, T8K;
Chris@42 1611 T8x = W[26];
Chris@42 1612 T8B = W[27];
Chris@42 1613 T8F = FNMS(T8B, T8E, T8x * T8A);
Chris@42 1614 T8P = FMA(T8B, T8A, T8x * T8E);
Chris@42 1615 T8G = W[28];
Chris@42 1616 T8K = W[29];
Chris@42 1617 T8O = FMA(T8G, T8J, T8K * T8N);
Chris@42 1618 T8Q = FNMS(T8K, T8J, T8G * T8N);
Chris@42 1619 }
Chris@42 1620 Rp[WS(rs, 7)] = T8F - T8O;
Chris@42 1621 Ip[WS(rs, 7)] = T8P + T8Q;
Chris@42 1622 Rm[WS(rs, 7)] = T8F + T8O;
Chris@42 1623 Im[WS(rs, 7)] = T8Q - T8P;
Chris@42 1624 }
Chris@42 1625 }
Chris@42 1626 {
Chris@42 1627 E T4k, T4S, T4s, T4U, T5a, T5q, T56, T5o, T4N, T5t, T4Z, T5f, T4F, T5v, T4X;
Chris@42 1628 E T5j;
Chris@42 1629 {
Chris@42 1630 E T4c, T4j, T54, T55;
Chris@42 1631 T4c = T4a + T4b;
Chris@42 1632 T4j = KP707106781 * (T4f + T4i);
Chris@42 1633 T4k = T4c + T4j;
Chris@42 1634 T4S = T4c - T4j;
Chris@42 1635 {
Chris@42 1636 E T4o, T4r, T58, T59;
Chris@42 1637 T4o = KP707106781 * (T4m + T4n);
Chris@42 1638 T4r = T4p + T4q;
Chris@42 1639 T4s = T4o + T4r;
Chris@42 1640 T4U = T4r - T4o;
Chris@42 1641 T58 = KP707106781 * (T4f - T4i);
Chris@42 1642 T59 = T4q - T4p;
Chris@42 1643 T5a = T58 + T59;
Chris@42 1644 T5q = T59 - T58;
Chris@42 1645 }
Chris@42 1646 T54 = T4a - T4b;
Chris@42 1647 T55 = KP707106781 * (T4n - T4m);
Chris@42 1648 T56 = T54 + T55;
Chris@42 1649 T5o = T54 - T55;
Chris@42 1650 {
Chris@42 1651 E T4M, T5d, T4J, T5e, T4H, T4I;
Chris@42 1652 T4M = T4K + T4L;
Chris@42 1653 T5d = T4v - T4w;
Chris@42 1654 T4H = FNMS(KP831469612, T4y, KP555570233 * T4z);
Chris@42 1655 T4I = FMA(KP831469612, T4B, KP555570233 * T4C);
Chris@42 1656 T4J = T4H + T4I;
Chris@42 1657 T5e = T4H - T4I;
Chris@42 1658 T4N = T4J + T4M;
Chris@42 1659 T5t = T5d - T5e;
Chris@42 1660 T4Z = T4M - T4J;
Chris@42 1661 T5f = T5d + T5e;
Chris@42 1662 }
Chris@42 1663 {
Chris@42 1664 E T4x, T5i, T4E, T5h, T4A, T4D;
Chris@42 1665 T4x = T4v + T4w;
Chris@42 1666 T5i = T4L - T4K;
Chris@42 1667 T4A = FMA(KP555570233, T4y, KP831469612 * T4z);
Chris@42 1668 T4D = FNMS(KP831469612, T4C, KP555570233 * T4B);
Chris@42 1669 T4E = T4A + T4D;
Chris@42 1670 T5h = T4D - T4A;
Chris@42 1671 T4F = T4x + T4E;
Chris@42 1672 T5v = T5i - T5h;
Chris@42 1673 T4X = T4x - T4E;
Chris@42 1674 T5j = T5h + T5i;
Chris@42 1675 }
Chris@42 1676 }
Chris@42 1677 {
Chris@42 1678 E T4t, T4P, T4O, T4Q;
Chris@42 1679 {
Chris@42 1680 E T49, T4l, T4u, T4G;
Chris@42 1681 T49 = W[6];
Chris@42 1682 T4l = W[7];
Chris@42 1683 T4t = FNMS(T4l, T4s, T49 * T4k);
Chris@42 1684 T4P = FMA(T4l, T4k, T49 * T4s);
Chris@42 1685 T4u = W[8];
Chris@42 1686 T4G = W[9];
Chris@42 1687 T4O = FMA(T4u, T4F, T4G * T4N);
Chris@42 1688 T4Q = FNMS(T4G, T4F, T4u * T4N);
Chris@42 1689 }
Chris@42 1690 Rp[WS(rs, 2)] = T4t - T4O;
Chris@42 1691 Ip[WS(rs, 2)] = T4P + T4Q;
Chris@42 1692 Rm[WS(rs, 2)] = T4t + T4O;
Chris@42 1693 Im[WS(rs, 2)] = T4Q - T4P;
Chris@42 1694 }
Chris@42 1695 {
Chris@42 1696 E T5r, T5x, T5w, T5y;
Chris@42 1697 {
Chris@42 1698 E T5n, T5p, T5s, T5u;
Chris@42 1699 T5n = W[54];
Chris@42 1700 T5p = W[55];
Chris@42 1701 T5r = FNMS(T5p, T5q, T5n * T5o);
Chris@42 1702 T5x = FMA(T5p, T5o, T5n * T5q);
Chris@42 1703 T5s = W[56];
Chris@42 1704 T5u = W[57];
Chris@42 1705 T5w = FMA(T5s, T5t, T5u * T5v);
Chris@42 1706 T5y = FNMS(T5u, T5t, T5s * T5v);
Chris@42 1707 }
Chris@42 1708 Rp[WS(rs, 14)] = T5r - T5w;
Chris@42 1709 Ip[WS(rs, 14)] = T5x + T5y;
Chris@42 1710 Rm[WS(rs, 14)] = T5r + T5w;
Chris@42 1711 Im[WS(rs, 14)] = T5y - T5x;
Chris@42 1712 }
Chris@42 1713 {
Chris@42 1714 E T4V, T51, T50, T52;
Chris@42 1715 {
Chris@42 1716 E T4R, T4T, T4W, T4Y;
Chris@42 1717 T4R = W[38];
Chris@42 1718 T4T = W[39];
Chris@42 1719 T4V = FNMS(T4T, T4U, T4R * T4S);
Chris@42 1720 T51 = FMA(T4T, T4S, T4R * T4U);
Chris@42 1721 T4W = W[40];
Chris@42 1722 T4Y = W[41];
Chris@42 1723 T50 = FMA(T4W, T4X, T4Y * T4Z);
Chris@42 1724 T52 = FNMS(T4Y, T4X, T4W * T4Z);
Chris@42 1725 }
Chris@42 1726 Rp[WS(rs, 10)] = T4V - T50;
Chris@42 1727 Ip[WS(rs, 10)] = T51 + T52;
Chris@42 1728 Rm[WS(rs, 10)] = T4V + T50;
Chris@42 1729 Im[WS(rs, 10)] = T52 - T51;
Chris@42 1730 }
Chris@42 1731 {
Chris@42 1732 E T5b, T5l, T5k, T5m;
Chris@42 1733 {
Chris@42 1734 E T53, T57, T5c, T5g;
Chris@42 1735 T53 = W[22];
Chris@42 1736 T57 = W[23];
Chris@42 1737 T5b = FNMS(T57, T5a, T53 * T56);
Chris@42 1738 T5l = FMA(T57, T56, T53 * T5a);
Chris@42 1739 T5c = W[24];
Chris@42 1740 T5g = W[25];
Chris@42 1741 T5k = FMA(T5c, T5f, T5g * T5j);
Chris@42 1742 T5m = FNMS(T5g, T5f, T5c * T5j);
Chris@42 1743 }
Chris@42 1744 Rp[WS(rs, 6)] = T5b - T5k;
Chris@42 1745 Ip[WS(rs, 6)] = T5l + T5m;
Chris@42 1746 Rm[WS(rs, 6)] = T5b + T5k;
Chris@42 1747 Im[WS(rs, 6)] = T5m - T5l;
Chris@42 1748 }
Chris@42 1749 }
Chris@42 1750 {
Chris@42 1751 E T60, T6W, T6c, T6Y, T7e, T7u, T7a, T7s, T6R, T7x, T73, T7j, T6F, T7z, T71;
Chris@42 1752 E T7n;
Chris@42 1753 {
Chris@42 1754 E T5K, T5Z, T78, T79;
Chris@42 1755 T5K = T5C + T5J;
Chris@42 1756 T5Z = T5R + T5Y;
Chris@42 1757 T60 = T5K + T5Z;
Chris@42 1758 T6W = T5K - T5Z;
Chris@42 1759 {
Chris@42 1760 E T64, T6b, T7c, T7d;
Chris@42 1761 T64 = T62 + T63;
Chris@42 1762 T6b = T67 + T6a;
Chris@42 1763 T6c = T64 + T6b;
Chris@42 1764 T6Y = T6b - T64;
Chris@42 1765 T7c = T5R - T5Y;
Chris@42 1766 T7d = T6a - T67;
Chris@42 1767 T7e = T7c + T7d;
Chris@42 1768 T7u = T7d - T7c;
Chris@42 1769 }
Chris@42 1770 T78 = T5C - T5J;
Chris@42 1771 T79 = T63 - T62;
Chris@42 1772 T7a = T78 + T79;
Chris@42 1773 T7s = T78 - T79;
Chris@42 1774 {
Chris@42 1775 E T6Q, T7h, T6J, T7i, T6H, T6I;
Chris@42 1776 T6Q = T6M + T6P;
Chris@42 1777 T7h = T6h - T6o;
Chris@42 1778 T6H = FNMS(KP555570233, T6s, KP831469612 * T6v);
Chris@42 1779 T6I = FMA(KP555570233, T6z, KP831469612 * T6C);
Chris@42 1780 T6J = T6H + T6I;
Chris@42 1781 T7i = T6H - T6I;
Chris@42 1782 T6R = T6J + T6Q;
Chris@42 1783 T7x = T7h - T7i;
Chris@42 1784 T73 = T6Q - T6J;
Chris@42 1785 T7j = T7h + T7i;
Chris@42 1786 }
Chris@42 1787 {
Chris@42 1788 E T6p, T7m, T6E, T7l, T6w, T6D;
Chris@42 1789 T6p = T6h + T6o;
Chris@42 1790 T7m = T6P - T6M;
Chris@42 1791 T6w = FMA(KP831469612, T6s, KP555570233 * T6v);
Chris@42 1792 T6D = FNMS(KP555570233, T6C, KP831469612 * T6z);
Chris@42 1793 T6E = T6w + T6D;
Chris@42 1794 T7l = T6D - T6w;
Chris@42 1795 T6F = T6p + T6E;
Chris@42 1796 T7z = T7m - T7l;
Chris@42 1797 T71 = T6p - T6E;
Chris@42 1798 T7n = T7l + T7m;
Chris@42 1799 }
Chris@42 1800 }
Chris@42 1801 {
Chris@42 1802 E T6d, T6T, T6S, T6U;
Chris@42 1803 {
Chris@42 1804 E T5z, T61, T6e, T6G;
Chris@42 1805 T5z = W[2];
Chris@42 1806 T61 = W[3];
Chris@42 1807 T6d = FNMS(T61, T6c, T5z * T60);
Chris@42 1808 T6T = FMA(T61, T60, T5z * T6c);
Chris@42 1809 T6e = W[4];
Chris@42 1810 T6G = W[5];
Chris@42 1811 T6S = FMA(T6e, T6F, T6G * T6R);
Chris@42 1812 T6U = FNMS(T6G, T6F, T6e * T6R);
Chris@42 1813 }
Chris@42 1814 Rp[WS(rs, 1)] = T6d - T6S;
Chris@42 1815 Ip[WS(rs, 1)] = T6T + T6U;
Chris@42 1816 Rm[WS(rs, 1)] = T6d + T6S;
Chris@42 1817 Im[WS(rs, 1)] = T6U - T6T;
Chris@42 1818 }
Chris@42 1819 {
Chris@42 1820 E T7v, T7B, T7A, T7C;
Chris@42 1821 {
Chris@42 1822 E T7r, T7t, T7w, T7y;
Chris@42 1823 T7r = W[50];
Chris@42 1824 T7t = W[51];
Chris@42 1825 T7v = FNMS(T7t, T7u, T7r * T7s);
Chris@42 1826 T7B = FMA(T7t, T7s, T7r * T7u);
Chris@42 1827 T7w = W[52];
Chris@42 1828 T7y = W[53];
Chris@42 1829 T7A = FMA(T7w, T7x, T7y * T7z);
Chris@42 1830 T7C = FNMS(T7y, T7x, T7w * T7z);
Chris@42 1831 }
Chris@42 1832 Rp[WS(rs, 13)] = T7v - T7A;
Chris@42 1833 Ip[WS(rs, 13)] = T7B + T7C;
Chris@42 1834 Rm[WS(rs, 13)] = T7v + T7A;
Chris@42 1835 Im[WS(rs, 13)] = T7C - T7B;
Chris@42 1836 }
Chris@42 1837 {
Chris@42 1838 E T6Z, T75, T74, T76;
Chris@42 1839 {
Chris@42 1840 E T6V, T6X, T70, T72;
Chris@42 1841 T6V = W[34];
Chris@42 1842 T6X = W[35];
Chris@42 1843 T6Z = FNMS(T6X, T6Y, T6V * T6W);
Chris@42 1844 T75 = FMA(T6X, T6W, T6V * T6Y);
Chris@42 1845 T70 = W[36];
Chris@42 1846 T72 = W[37];
Chris@42 1847 T74 = FMA(T70, T71, T72 * T73);
Chris@42 1848 T76 = FNMS(T72, T71, T70 * T73);
Chris@42 1849 }
Chris@42 1850 Rp[WS(rs, 9)] = T6Z - T74;
Chris@42 1851 Ip[WS(rs, 9)] = T75 + T76;
Chris@42 1852 Rm[WS(rs, 9)] = T6Z + T74;
Chris@42 1853 Im[WS(rs, 9)] = T76 - T75;
Chris@42 1854 }
Chris@42 1855 {
Chris@42 1856 E T7f, T7p, T7o, T7q;
Chris@42 1857 {
Chris@42 1858 E T77, T7b, T7g, T7k;
Chris@42 1859 T77 = W[18];
Chris@42 1860 T7b = W[19];
Chris@42 1861 T7f = FNMS(T7b, T7e, T77 * T7a);
Chris@42 1862 T7p = FMA(T7b, T7a, T77 * T7e);
Chris@42 1863 T7g = W[20];
Chris@42 1864 T7k = W[21];
Chris@42 1865 T7o = FMA(T7g, T7j, T7k * T7n);
Chris@42 1866 T7q = FNMS(T7k, T7j, T7g * T7n);
Chris@42 1867 }
Chris@42 1868 Rp[WS(rs, 5)] = T7f - T7o;
Chris@42 1869 Ip[WS(rs, 5)] = T7p + T7q;
Chris@42 1870 Rm[WS(rs, 5)] = T7f + T7o;
Chris@42 1871 Im[WS(rs, 5)] = T7q - T7p;
Chris@42 1872 }
Chris@42 1873 }
Chris@42 1874 }
Chris@42 1875 }
Chris@42 1876 }
Chris@42 1877
Chris@42 1878 static const tw_instr twinstr[] = {
Chris@42 1879 {TW_FULL, 1, 32},
Chris@42 1880 {TW_NEXT, 1, 0}
Chris@42 1881 };
Chris@42 1882
Chris@42 1883 static const hc2c_desc desc = { 32, "hc2cbdft_32", twinstr, &GENUS, {404, 114, 94, 0} };
Chris@42 1884
Chris@42 1885 void X(codelet_hc2cbdft_32) (planner *p) {
Chris@42 1886 X(khc2c_register) (p, hc2cbdft_32, &desc, HC2C_VIA_DFT);
Chris@42 1887 }
Chris@42 1888 #endif /* HAVE_FMA */