annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:30 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 41 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 43 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E T8, T8y, T3w, T87, Tl, T8x, T3B, T83, Tz, T6G, T3J, T5T, TM, T6F, T3Q;
Chris@82 50 E T5U, T11, T1e, T6J, T6K, T6L, T6M, T3Z, T5Y, T46, T5X, T1s, T1F, T6O, T6P;
Chris@82 51 E T6Q, T6R, T4e, T61, T4l, T60, T32, T76, T7d, T7N, T54, T6c, T5r, T6f, T29;
Chris@82 52 E T6V, T72, T7I, T4v, T65, T4S, T68, T3t, T7e, T79, T7O, T5b, T5t, T5i, T5s;
Chris@82 53 E T2A, T73, T6Y, T7J, T4C, T4T, T4J, T4U;
Chris@82 54 {
Chris@82 55 E T1, T86, T3, T6, T4, T84, T2, T7, T85, T5;
Chris@82 56 T1 = cr[0];
Chris@82 57 T86 = ci[0];
Chris@82 58 T3 = cr[WS(rs, 16)];
Chris@82 59 T6 = ci[WS(rs, 16)];
Chris@82 60 T2 = W[30];
Chris@82 61 T4 = T2 * T3;
Chris@82 62 T84 = T2 * T6;
Chris@82 63 T5 = W[31];
Chris@82 64 T7 = FMA(T5, T6, T4);
Chris@82 65 T85 = FNMS(T5, T3, T84);
Chris@82 66 T8 = T1 + T7;
Chris@82 67 T8y = T86 - T85;
Chris@82 68 T3w = T1 - T7;
Chris@82 69 T87 = T85 + T86;
Chris@82 70 }
Chris@82 71 {
Chris@82 72 E Ta, Td, Tb, T3x, Tg, Tj, Th, T3z, T9, Tf;
Chris@82 73 Ta = cr[WS(rs, 8)];
Chris@82 74 Td = ci[WS(rs, 8)];
Chris@82 75 T9 = W[14];
Chris@82 76 Tb = T9 * Ta;
Chris@82 77 T3x = T9 * Td;
Chris@82 78 Tg = cr[WS(rs, 24)];
Chris@82 79 Tj = ci[WS(rs, 24)];
Chris@82 80 Tf = W[46];
Chris@82 81 Th = Tf * Tg;
Chris@82 82 T3z = Tf * Tj;
Chris@82 83 {
Chris@82 84 E Te, T3y, Tk, T3A, Tc, Ti;
Chris@82 85 Tc = W[15];
Chris@82 86 Te = FMA(Tc, Td, Tb);
Chris@82 87 T3y = FNMS(Tc, Ta, T3x);
Chris@82 88 Ti = W[47];
Chris@82 89 Tk = FMA(Ti, Tj, Th);
Chris@82 90 T3A = FNMS(Ti, Tg, T3z);
Chris@82 91 Tl = Te + Tk;
Chris@82 92 T8x = Te - Tk;
Chris@82 93 T3B = T3y - T3A;
Chris@82 94 T83 = T3y + T3A;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Ts, T3F, Ty, T3H, T3D, T3I;
Chris@82 99 {
Chris@82 100 E To, Tr, Tp, T3E, Tn, Tq;
Chris@82 101 To = cr[WS(rs, 4)];
Chris@82 102 Tr = ci[WS(rs, 4)];
Chris@82 103 Tn = W[6];
Chris@82 104 Tp = Tn * To;
Chris@82 105 T3E = Tn * Tr;
Chris@82 106 Tq = W[7];
Chris@82 107 Ts = FMA(Tq, Tr, Tp);
Chris@82 108 T3F = FNMS(Tq, To, T3E);
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E Tu, Tx, Tv, T3G, Tt, Tw;
Chris@82 112 Tu = cr[WS(rs, 20)];
Chris@82 113 Tx = ci[WS(rs, 20)];
Chris@82 114 Tt = W[38];
Chris@82 115 Tv = Tt * Tu;
Chris@82 116 T3G = Tt * Tx;
Chris@82 117 Tw = W[39];
Chris@82 118 Ty = FMA(Tw, Tx, Tv);
Chris@82 119 T3H = FNMS(Tw, Tu, T3G);
Chris@82 120 }
Chris@82 121 Tz = Ts + Ty;
Chris@82 122 T6G = T3F + T3H;
Chris@82 123 T3D = Ts - Ty;
Chris@82 124 T3I = T3F - T3H;
Chris@82 125 T3J = T3D - T3I;
Chris@82 126 T5T = T3D + T3I;
Chris@82 127 }
Chris@82 128 {
Chris@82 129 E TF, T3M, TL, T3O, T3K, T3P;
Chris@82 130 {
Chris@82 131 E TB, TE, TC, T3L, TA, TD;
Chris@82 132 TB = cr[WS(rs, 28)];
Chris@82 133 TE = ci[WS(rs, 28)];
Chris@82 134 TA = W[54];
Chris@82 135 TC = TA * TB;
Chris@82 136 T3L = TA * TE;
Chris@82 137 TD = W[55];
Chris@82 138 TF = FMA(TD, TE, TC);
Chris@82 139 T3M = FNMS(TD, TB, T3L);
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E TH, TK, TI, T3N, TG, TJ;
Chris@82 143 TH = cr[WS(rs, 12)];
Chris@82 144 TK = ci[WS(rs, 12)];
Chris@82 145 TG = W[22];
Chris@82 146 TI = TG * TH;
Chris@82 147 T3N = TG * TK;
Chris@82 148 TJ = W[23];
Chris@82 149 TL = FMA(TJ, TK, TI);
Chris@82 150 T3O = FNMS(TJ, TH, T3N);
Chris@82 151 }
Chris@82 152 TM = TF + TL;
Chris@82 153 T6F = T3M + T3O;
Chris@82 154 T3K = TF - TL;
Chris@82 155 T3P = T3M - T3O;
Chris@82 156 T3Q = T3K + T3P;
Chris@82 157 T5U = T3K - T3P;
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TU, T3U, T1d, T44, T10, T3W, T17, T42;
Chris@82 161 {
Chris@82 162 E TQ, TT, TR, T3T, TP, TS;
Chris@82 163 TQ = cr[WS(rs, 2)];
Chris@82 164 TT = ci[WS(rs, 2)];
Chris@82 165 TP = W[2];
Chris@82 166 TR = TP * TQ;
Chris@82 167 T3T = TP * TT;
Chris@82 168 TS = W[3];
Chris@82 169 TU = FMA(TS, TT, TR);
Chris@82 170 T3U = FNMS(TS, TQ, T3T);
Chris@82 171 }
Chris@82 172 {
Chris@82 173 E T19, T1c, T1a, T43, T18, T1b;
Chris@82 174 T19 = cr[WS(rs, 26)];
Chris@82 175 T1c = ci[WS(rs, 26)];
Chris@82 176 T18 = W[50];
Chris@82 177 T1a = T18 * T19;
Chris@82 178 T43 = T18 * T1c;
Chris@82 179 T1b = W[51];
Chris@82 180 T1d = FMA(T1b, T1c, T1a);
Chris@82 181 T44 = FNMS(T1b, T19, T43);
Chris@82 182 }
Chris@82 183 {
Chris@82 184 E TW, TZ, TX, T3V, TV, TY;
Chris@82 185 TW = cr[WS(rs, 18)];
Chris@82 186 TZ = ci[WS(rs, 18)];
Chris@82 187 TV = W[34];
Chris@82 188 TX = TV * TW;
Chris@82 189 T3V = TV * TZ;
Chris@82 190 TY = W[35];
Chris@82 191 T10 = FMA(TY, TZ, TX);
Chris@82 192 T3W = FNMS(TY, TW, T3V);
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E T13, T16, T14, T41, T12, T15;
Chris@82 196 T13 = cr[WS(rs, 10)];
Chris@82 197 T16 = ci[WS(rs, 10)];
Chris@82 198 T12 = W[18];
Chris@82 199 T14 = T12 * T13;
Chris@82 200 T41 = T12 * T16;
Chris@82 201 T15 = W[19];
Chris@82 202 T17 = FMA(T15, T16, T14);
Chris@82 203 T42 = FNMS(T15, T13, T41);
Chris@82 204 }
Chris@82 205 T11 = TU + T10;
Chris@82 206 T1e = T17 + T1d;
Chris@82 207 T6J = T11 - T1e;
Chris@82 208 T6K = T3U + T3W;
Chris@82 209 T6L = T42 + T44;
Chris@82 210 T6M = T6K - T6L;
Chris@82 211 {
Chris@82 212 E T3X, T3Y, T40, T45;
Chris@82 213 T3X = T3U - T3W;
Chris@82 214 T3Y = T17 - T1d;
Chris@82 215 T3Z = T3X + T3Y;
Chris@82 216 T5Y = T3X - T3Y;
Chris@82 217 T40 = TU - T10;
Chris@82 218 T45 = T42 - T44;
Chris@82 219 T46 = T40 - T45;
Chris@82 220 T5X = T40 + T45;
Chris@82 221 }
Chris@82 222 }
Chris@82 223 {
Chris@82 224 E T1l, T49, T1E, T4j, T1r, T4b, T1y, T4h;
Chris@82 225 {
Chris@82 226 E T1h, T1k, T1i, T48, T1g, T1j;
Chris@82 227 T1h = cr[WS(rs, 30)];
Chris@82 228 T1k = ci[WS(rs, 30)];
Chris@82 229 T1g = W[58];
Chris@82 230 T1i = T1g * T1h;
Chris@82 231 T48 = T1g * T1k;
Chris@82 232 T1j = W[59];
Chris@82 233 T1l = FMA(T1j, T1k, T1i);
Chris@82 234 T49 = FNMS(T1j, T1h, T48);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1A, T1D, T1B, T4i, T1z, T1C;
Chris@82 238 T1A = cr[WS(rs, 22)];
Chris@82 239 T1D = ci[WS(rs, 22)];
Chris@82 240 T1z = W[42];
Chris@82 241 T1B = T1z * T1A;
Chris@82 242 T4i = T1z * T1D;
Chris@82 243 T1C = W[43];
Chris@82 244 T1E = FMA(T1C, T1D, T1B);
Chris@82 245 T4j = FNMS(T1C, T1A, T4i);
Chris@82 246 }
Chris@82 247 {
Chris@82 248 E T1n, T1q, T1o, T4a, T1m, T1p;
Chris@82 249 T1n = cr[WS(rs, 14)];
Chris@82 250 T1q = ci[WS(rs, 14)];
Chris@82 251 T1m = W[26];
Chris@82 252 T1o = T1m * T1n;
Chris@82 253 T4a = T1m * T1q;
Chris@82 254 T1p = W[27];
Chris@82 255 T1r = FMA(T1p, T1q, T1o);
Chris@82 256 T4b = FNMS(T1p, T1n, T4a);
Chris@82 257 }
Chris@82 258 {
Chris@82 259 E T1u, T1x, T1v, T4g, T1t, T1w;
Chris@82 260 T1u = cr[WS(rs, 6)];
Chris@82 261 T1x = ci[WS(rs, 6)];
Chris@82 262 T1t = W[10];
Chris@82 263 T1v = T1t * T1u;
Chris@82 264 T4g = T1t * T1x;
Chris@82 265 T1w = W[11];
Chris@82 266 T1y = FMA(T1w, T1x, T1v);
Chris@82 267 T4h = FNMS(T1w, T1u, T4g);
Chris@82 268 }
Chris@82 269 T1s = T1l + T1r;
Chris@82 270 T1F = T1y + T1E;
Chris@82 271 T6O = T1s - T1F;
Chris@82 272 T6P = T49 + T4b;
Chris@82 273 T6Q = T4h + T4j;
Chris@82 274 T6R = T6P - T6Q;
Chris@82 275 {
Chris@82 276 E T4c, T4d, T4f, T4k;
Chris@82 277 T4c = T49 - T4b;
Chris@82 278 T4d = T1y - T1E;
Chris@82 279 T4e = T4c + T4d;
Chris@82 280 T61 = T4c - T4d;
Chris@82 281 T4f = T1l - T1r;
Chris@82 282 T4k = T4h - T4j;
Chris@82 283 T4l = T4f - T4k;
Chris@82 284 T60 = T4f + T4k;
Chris@82 285 }
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T2H, T5n, T30, T52, T2N, T5p, T2U, T50;
Chris@82 289 {
Chris@82 290 E T2D, T2G, T2E, T5m, T2C, T2F;
Chris@82 291 T2D = cr[WS(rs, 31)];
Chris@82 292 T2G = ci[WS(rs, 31)];
Chris@82 293 T2C = W[60];
Chris@82 294 T2E = T2C * T2D;
Chris@82 295 T5m = T2C * T2G;
Chris@82 296 T2F = W[61];
Chris@82 297 T2H = FMA(T2F, T2G, T2E);
Chris@82 298 T5n = FNMS(T2F, T2D, T5m);
Chris@82 299 }
Chris@82 300 {
Chris@82 301 E T2W, T2Z, T2X, T51, T2V, T2Y;
Chris@82 302 T2W = cr[WS(rs, 23)];
Chris@82 303 T2Z = ci[WS(rs, 23)];
Chris@82 304 T2V = W[44];
Chris@82 305 T2X = T2V * T2W;
Chris@82 306 T51 = T2V * T2Z;
Chris@82 307 T2Y = W[45];
Chris@82 308 T30 = FMA(T2Y, T2Z, T2X);
Chris@82 309 T52 = FNMS(T2Y, T2W, T51);
Chris@82 310 }
Chris@82 311 {
Chris@82 312 E T2J, T2M, T2K, T5o, T2I, T2L;
Chris@82 313 T2J = cr[WS(rs, 15)];
Chris@82 314 T2M = ci[WS(rs, 15)];
Chris@82 315 T2I = W[28];
Chris@82 316 T2K = T2I * T2J;
Chris@82 317 T5o = T2I * T2M;
Chris@82 318 T2L = W[29];
Chris@82 319 T2N = FMA(T2L, T2M, T2K);
Chris@82 320 T5p = FNMS(T2L, T2J, T5o);
Chris@82 321 }
Chris@82 322 {
Chris@82 323 E T2Q, T2T, T2R, T4Z, T2P, T2S;
Chris@82 324 T2Q = cr[WS(rs, 7)];
Chris@82 325 T2T = ci[WS(rs, 7)];
Chris@82 326 T2P = W[12];
Chris@82 327 T2R = T2P * T2Q;
Chris@82 328 T4Z = T2P * T2T;
Chris@82 329 T2S = W[13];
Chris@82 330 T2U = FMA(T2S, T2T, T2R);
Chris@82 331 T50 = FNMS(T2S, T2Q, T4Z);
Chris@82 332 }
Chris@82 333 {
Chris@82 334 E T2O, T31, T7b, T7c;
Chris@82 335 T2O = T2H + T2N;
Chris@82 336 T31 = T2U + T30;
Chris@82 337 T32 = T2O + T31;
Chris@82 338 T76 = T2O - T31;
Chris@82 339 T7b = T5n + T5p;
Chris@82 340 T7c = T50 + T52;
Chris@82 341 T7d = T7b - T7c;
Chris@82 342 T7N = T7b + T7c;
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T4Y, T53, T5l, T5q;
Chris@82 346 T4Y = T2H - T2N;
Chris@82 347 T53 = T50 - T52;
Chris@82 348 T54 = T4Y - T53;
Chris@82 349 T6c = T4Y + T53;
Chris@82 350 T5l = T30 - T2U;
Chris@82 351 T5q = T5n - T5p;
Chris@82 352 T5r = T5l - T5q;
Chris@82 353 T6f = T5q + T5l;
Chris@82 354 }
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T1O, T4N, T27, T4t, T1U, T4P, T21, T4r;
Chris@82 358 {
Chris@82 359 E T1K, T1N, T1L, T4M, T1J, T1M;
Chris@82 360 T1K = cr[WS(rs, 1)];
Chris@82 361 T1N = ci[WS(rs, 1)];
Chris@82 362 T1J = W[0];
Chris@82 363 T1L = T1J * T1K;
Chris@82 364 T4M = T1J * T1N;
Chris@82 365 T1M = W[1];
Chris@82 366 T1O = FMA(T1M, T1N, T1L);
Chris@82 367 T4N = FNMS(T1M, T1K, T4M);
Chris@82 368 }
Chris@82 369 {
Chris@82 370 E T23, T26, T24, T4s, T22, T25;
Chris@82 371 T23 = cr[WS(rs, 25)];
Chris@82 372 T26 = ci[WS(rs, 25)];
Chris@82 373 T22 = W[48];
Chris@82 374 T24 = T22 * T23;
Chris@82 375 T4s = T22 * T26;
Chris@82 376 T25 = W[49];
Chris@82 377 T27 = FMA(T25, T26, T24);
Chris@82 378 T4t = FNMS(T25, T23, T4s);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E T1Q, T1T, T1R, T4O, T1P, T1S;
Chris@82 382 T1Q = cr[WS(rs, 17)];
Chris@82 383 T1T = ci[WS(rs, 17)];
Chris@82 384 T1P = W[32];
Chris@82 385 T1R = T1P * T1Q;
Chris@82 386 T4O = T1P * T1T;
Chris@82 387 T1S = W[33];
Chris@82 388 T1U = FMA(T1S, T1T, T1R);
Chris@82 389 T4P = FNMS(T1S, T1Q, T4O);
Chris@82 390 }
Chris@82 391 {
Chris@82 392 E T1X, T20, T1Y, T4q, T1W, T1Z;
Chris@82 393 T1X = cr[WS(rs, 9)];
Chris@82 394 T20 = ci[WS(rs, 9)];
Chris@82 395 T1W = W[16];
Chris@82 396 T1Y = T1W * T1X;
Chris@82 397 T4q = T1W * T20;
Chris@82 398 T1Z = W[17];
Chris@82 399 T21 = FMA(T1Z, T20, T1Y);
Chris@82 400 T4r = FNMS(T1Z, T1X, T4q);
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T1V, T28, T70, T71;
Chris@82 404 T1V = T1O + T1U;
Chris@82 405 T28 = T21 + T27;
Chris@82 406 T29 = T1V + T28;
Chris@82 407 T6V = T1V - T28;
Chris@82 408 T70 = T4N + T4P;
Chris@82 409 T71 = T4r + T4t;
Chris@82 410 T72 = T70 - T71;
Chris@82 411 T7I = T70 + T71;
Chris@82 412 }
Chris@82 413 {
Chris@82 414 E T4p, T4u, T4Q, T4R;
Chris@82 415 T4p = T1O - T1U;
Chris@82 416 T4u = T4r - T4t;
Chris@82 417 T4v = T4p - T4u;
Chris@82 418 T65 = T4p + T4u;
Chris@82 419 T4Q = T4N - T4P;
Chris@82 420 T4R = T21 - T27;
Chris@82 421 T4S = T4Q + T4R;
Chris@82 422 T68 = T4Q - T4R;
Chris@82 423 }
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T38, T57, T3r, T5g, T3e, T59, T3l, T5e;
Chris@82 427 {
Chris@82 428 E T34, T37, T35, T56, T33, T36;
Chris@82 429 T34 = cr[WS(rs, 3)];
Chris@82 430 T37 = ci[WS(rs, 3)];
Chris@82 431 T33 = W[4];
Chris@82 432 T35 = T33 * T34;
Chris@82 433 T56 = T33 * T37;
Chris@82 434 T36 = W[5];
Chris@82 435 T38 = FMA(T36, T37, T35);
Chris@82 436 T57 = FNMS(T36, T34, T56);
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E T3n, T3q, T3o, T5f, T3m, T3p;
Chris@82 440 T3n = cr[WS(rs, 11)];
Chris@82 441 T3q = ci[WS(rs, 11)];
Chris@82 442 T3m = W[20];
Chris@82 443 T3o = T3m * T3n;
Chris@82 444 T5f = T3m * T3q;
Chris@82 445 T3p = W[21];
Chris@82 446 T3r = FMA(T3p, T3q, T3o);
Chris@82 447 T5g = FNMS(T3p, T3n, T5f);
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T3a, T3d, T3b, T58, T39, T3c;
Chris@82 451 T3a = cr[WS(rs, 19)];
Chris@82 452 T3d = ci[WS(rs, 19)];
Chris@82 453 T39 = W[36];
Chris@82 454 T3b = T39 * T3a;
Chris@82 455 T58 = T39 * T3d;
Chris@82 456 T3c = W[37];
Chris@82 457 T3e = FMA(T3c, T3d, T3b);
Chris@82 458 T59 = FNMS(T3c, T3a, T58);
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E T3h, T3k, T3i, T5d, T3g, T3j;
Chris@82 462 T3h = cr[WS(rs, 27)];
Chris@82 463 T3k = ci[WS(rs, 27)];
Chris@82 464 T3g = W[52];
Chris@82 465 T3i = T3g * T3h;
Chris@82 466 T5d = T3g * T3k;
Chris@82 467 T3j = W[53];
Chris@82 468 T3l = FMA(T3j, T3k, T3i);
Chris@82 469 T5e = FNMS(T3j, T3h, T5d);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T3f, T3s, T77, T78;
Chris@82 473 T3f = T38 + T3e;
Chris@82 474 T3s = T3l + T3r;
Chris@82 475 T3t = T3f + T3s;
Chris@82 476 T7e = T3s - T3f;
Chris@82 477 T77 = T5e + T5g;
Chris@82 478 T78 = T57 + T59;
Chris@82 479 T79 = T77 - T78;
Chris@82 480 T7O = T78 + T77;
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E T55, T5a, T5c, T5h;
Chris@82 484 T55 = T38 - T3e;
Chris@82 485 T5a = T57 - T59;
Chris@82 486 T5b = T55 - T5a;
Chris@82 487 T5t = T55 + T5a;
Chris@82 488 T5c = T3l - T3r;
Chris@82 489 T5h = T5e - T5g;
Chris@82 490 T5i = T5c + T5h;
Chris@82 491 T5s = T5c - T5h;
Chris@82 492 }
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T2f, T4y, T2y, T4H, T2l, T4A, T2s, T4F;
Chris@82 496 {
Chris@82 497 E T2b, T2e, T2c, T4x, T2a, T2d;
Chris@82 498 T2b = cr[WS(rs, 5)];
Chris@82 499 T2e = ci[WS(rs, 5)];
Chris@82 500 T2a = W[8];
Chris@82 501 T2c = T2a * T2b;
Chris@82 502 T4x = T2a * T2e;
Chris@82 503 T2d = W[9];
Chris@82 504 T2f = FMA(T2d, T2e, T2c);
Chris@82 505 T4y = FNMS(T2d, T2b, T4x);
Chris@82 506 }
Chris@82 507 {
Chris@82 508 E T2u, T2x, T2v, T4G, T2t, T2w;
Chris@82 509 T2u = cr[WS(rs, 13)];
Chris@82 510 T2x = ci[WS(rs, 13)];
Chris@82 511 T2t = W[24];
Chris@82 512 T2v = T2t * T2u;
Chris@82 513 T4G = T2t * T2x;
Chris@82 514 T2w = W[25];
Chris@82 515 T2y = FMA(T2w, T2x, T2v);
Chris@82 516 T4H = FNMS(T2w, T2u, T4G);
Chris@82 517 }
Chris@82 518 {
Chris@82 519 E T2h, T2k, T2i, T4z, T2g, T2j;
Chris@82 520 T2h = cr[WS(rs, 21)];
Chris@82 521 T2k = ci[WS(rs, 21)];
Chris@82 522 T2g = W[40];
Chris@82 523 T2i = T2g * T2h;
Chris@82 524 T4z = T2g * T2k;
Chris@82 525 T2j = W[41];
Chris@82 526 T2l = FMA(T2j, T2k, T2i);
Chris@82 527 T4A = FNMS(T2j, T2h, T4z);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T2o, T2r, T2p, T4E, T2n, T2q;
Chris@82 531 T2o = cr[WS(rs, 29)];
Chris@82 532 T2r = ci[WS(rs, 29)];
Chris@82 533 T2n = W[56];
Chris@82 534 T2p = T2n * T2o;
Chris@82 535 T4E = T2n * T2r;
Chris@82 536 T2q = W[57];
Chris@82 537 T2s = FMA(T2q, T2r, T2p);
Chris@82 538 T4F = FNMS(T2q, T2o, T4E);
Chris@82 539 }
Chris@82 540 {
Chris@82 541 E T2m, T2z, T6W, T6X;
Chris@82 542 T2m = T2f + T2l;
Chris@82 543 T2z = T2s + T2y;
Chris@82 544 T2A = T2m + T2z;
Chris@82 545 T73 = T2m - T2z;
Chris@82 546 T6W = T4F + T4H;
Chris@82 547 T6X = T4y + T4A;
Chris@82 548 T6Y = T6W - T6X;
Chris@82 549 T7J = T6X + T6W;
Chris@82 550 }
Chris@82 551 {
Chris@82 552 E T4w, T4B, T4D, T4I;
Chris@82 553 T4w = T2f - T2l;
Chris@82 554 T4B = T4y - T4A;
Chris@82 555 T4C = T4w - T4B;
Chris@82 556 T4T = T4w + T4B;
Chris@82 557 T4D = T2s - T2y;
Chris@82 558 T4I = T4F - T4H;
Chris@82 559 T4J = T4D + T4I;
Chris@82 560 T4U = T4I - T4D;
Chris@82 561 }
Chris@82 562 }
Chris@82 563 {
Chris@82 564 E TO, T7C, T7Z, T80, T89, T8e, T1H, T8d, T3v, T8b, T7L, T7T, T7Q, T7U, T7F;
Chris@82 565 E T81;
Chris@82 566 {
Chris@82 567 E Tm, TN, T7X, T7Y;
Chris@82 568 Tm = T8 + Tl;
Chris@82 569 TN = Tz + TM;
Chris@82 570 TO = Tm + TN;
Chris@82 571 T7C = Tm - TN;
Chris@82 572 T7X = T7N + T7O;
Chris@82 573 T7Y = T7I + T7J;
Chris@82 574 T7Z = T7X - T7Y;
Chris@82 575 T80 = T7Y + T7X;
Chris@82 576 }
Chris@82 577 {
Chris@82 578 E T82, T88, T1f, T1G;
Chris@82 579 T82 = T6G + T6F;
Chris@82 580 T88 = T83 + T87;
Chris@82 581 T89 = T82 + T88;
Chris@82 582 T8e = T88 - T82;
Chris@82 583 T1f = T11 + T1e;
Chris@82 584 T1G = T1s + T1F;
Chris@82 585 T1H = T1f + T1G;
Chris@82 586 T8d = T1f - T1G;
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T2B, T3u, T7H, T7K;
Chris@82 590 T2B = T29 + T2A;
Chris@82 591 T3u = T32 + T3t;
Chris@82 592 T3v = T2B + T3u;
Chris@82 593 T8b = T3u - T2B;
Chris@82 594 T7H = T29 - T2A;
Chris@82 595 T7K = T7I - T7J;
Chris@82 596 T7L = T7H + T7K;
Chris@82 597 T7T = T7H - T7K;
Chris@82 598 }
Chris@82 599 {
Chris@82 600 E T7M, T7P, T7D, T7E;
Chris@82 601 T7M = T32 - T3t;
Chris@82 602 T7P = T7N - T7O;
Chris@82 603 T7Q = T7M - T7P;
Chris@82 604 T7U = T7M + T7P;
Chris@82 605 T7D = T6P + T6Q;
Chris@82 606 T7E = T6K + T6L;
Chris@82 607 T7F = T7D - T7E;
Chris@82 608 T81 = T7E + T7D;
Chris@82 609 }
Chris@82 610 {
Chris@82 611 E T1I, T8a, T8c, T7W;
Chris@82 612 T1I = TO + T1H;
Chris@82 613 ci[WS(rs, 15)] = T1I - T3v;
Chris@82 614 cr[0] = T1I + T3v;
Chris@82 615 T8a = T81 + T89;
Chris@82 616 cr[WS(rs, 16)] = T80 - T8a;
Chris@82 617 ci[WS(rs, 31)] = T80 + T8a;
Chris@82 618 T8c = T89 - T81;
Chris@82 619 cr[WS(rs, 24)] = T8b - T8c;
Chris@82 620 ci[WS(rs, 23)] = T8b + T8c;
Chris@82 621 T7W = TO - T1H;
Chris@82 622 cr[WS(rs, 8)] = T7W - T7Z;
Chris@82 623 ci[WS(rs, 7)] = T7W + T7Z;
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T7G, T7R, T8f, T8g;
Chris@82 627 T7G = T7C - T7F;
Chris@82 628 T7R = T7L + T7Q;
Chris@82 629 ci[WS(rs, 11)] = FNMS(KP707106781, T7R, T7G);
Chris@82 630 cr[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
Chris@82 631 T8f = T8d + T8e;
Chris@82 632 T8g = T7Q - T7L;
Chris@82 633 cr[WS(rs, 28)] = FMS(KP707106781, T8g, T8f);
Chris@82 634 ci[WS(rs, 19)] = FMA(KP707106781, T8g, T8f);
Chris@82 635 }
Chris@82 636 {
Chris@82 637 E T8h, T8i, T7S, T7V;
Chris@82 638 T8h = T8e - T8d;
Chris@82 639 T8i = T7U - T7T;
Chris@82 640 cr[WS(rs, 20)] = FMS(KP707106781, T8i, T8h);
Chris@82 641 ci[WS(rs, 27)] = FMA(KP707106781, T8i, T8h);
Chris@82 642 T7S = T7C + T7F;
Chris@82 643 T7V = T7T + T7U;
Chris@82 644 cr[WS(rs, 12)] = FNMS(KP707106781, T7V, T7S);
Chris@82 645 ci[WS(rs, 3)] = FMA(KP707106781, T7V, T7S);
Chris@82 646 }
Chris@82 647 }
Chris@82 648 {
Chris@82 649 E T3S, T5C, T4n, T8C, T8B, T8H, T5F, T8I, T5w, T5Q, T5A, T5M, T4X, T5P, T5z;
Chris@82 650 E T5J;
Chris@82 651 {
Chris@82 652 E T3C, T3R, T5D, T5E;
Chris@82 653 T3C = T3w - T3B;
Chris@82 654 T3R = T3J + T3Q;
Chris@82 655 T3S = FNMS(KP707106781, T3R, T3C);
Chris@82 656 T5C = FMA(KP707106781, T3R, T3C);
Chris@82 657 {
Chris@82 658 E T47, T4m, T8z, T8A;
Chris@82 659 T47 = FMA(KP414213562, T46, T3Z);
Chris@82 660 T4m = FNMS(KP414213562, T4l, T4e);
Chris@82 661 T4n = T47 - T4m;
Chris@82 662 T8C = T47 + T4m;
Chris@82 663 T8z = T8x + T8y;
Chris@82 664 T8A = T5T - T5U;
Chris@82 665 T8B = FMA(KP707106781, T8A, T8z);
Chris@82 666 T8H = FNMS(KP707106781, T8A, T8z);
Chris@82 667 }
Chris@82 668 T5D = FNMS(KP414213562, T3Z, T46);
Chris@82 669 T5E = FMA(KP414213562, T4e, T4l);
Chris@82 670 T5F = T5D + T5E;
Chris@82 671 T8I = T5E - T5D;
Chris@82 672 {
Chris@82 673 E T5k, T5K, T5v, T5L, T5j, T5u;
Chris@82 674 T5j = T5b + T5i;
Chris@82 675 T5k = FNMS(KP707106781, T5j, T54);
Chris@82 676 T5K = FMA(KP707106781, T5j, T54);
Chris@82 677 T5u = T5s - T5t;
Chris@82 678 T5v = FNMS(KP707106781, T5u, T5r);
Chris@82 679 T5L = FMA(KP707106781, T5u, T5r);
Chris@82 680 T5w = FMA(KP668178637, T5v, T5k);
Chris@82 681 T5Q = FMA(KP198912367, T5K, T5L);
Chris@82 682 T5A = FNMS(KP668178637, T5k, T5v);
Chris@82 683 T5M = FNMS(KP198912367, T5L, T5K);
Chris@82 684 }
Chris@82 685 {
Chris@82 686 E T4L, T5H, T4W, T5I, T4K, T4V;
Chris@82 687 T4K = T4C + T4J;
Chris@82 688 T4L = FNMS(KP707106781, T4K, T4v);
Chris@82 689 T5H = FMA(KP707106781, T4K, T4v);
Chris@82 690 T4V = T4T + T4U;
Chris@82 691 T4W = FNMS(KP707106781, T4V, T4S);
Chris@82 692 T5I = FMA(KP707106781, T4V, T4S);
Chris@82 693 T4X = FMA(KP668178637, T4W, T4L);
Chris@82 694 T5P = FMA(KP198912367, T5H, T5I);
Chris@82 695 T5z = FNMS(KP668178637, T4L, T4W);
Chris@82 696 T5J = FNMS(KP198912367, T5I, T5H);
Chris@82 697 }
Chris@82 698 }
Chris@82 699 {
Chris@82 700 E T4o, T5x, T8J, T8K;
Chris@82 701 T4o = FMA(KP923879532, T4n, T3S);
Chris@82 702 T5x = T4X + T5w;
Chris@82 703 ci[WS(rs, 12)] = FNMS(KP831469612, T5x, T4o);
Chris@82 704 cr[WS(rs, 3)] = FMA(KP831469612, T5x, T4o);
Chris@82 705 T8J = FMA(KP923879532, T8I, T8H);
Chris@82 706 T8K = T5z - T5A;
Chris@82 707 cr[WS(rs, 19)] = FMS(KP831469612, T8K, T8J);
Chris@82 708 ci[WS(rs, 28)] = FMA(KP831469612, T8K, T8J);
Chris@82 709 }
Chris@82 710 {
Chris@82 711 E T8L, T8M, T5y, T5B;
Chris@82 712 T8L = FNMS(KP923879532, T8I, T8H);
Chris@82 713 T8M = T5w - T4X;
Chris@82 714 cr[WS(rs, 27)] = FMS(KP831469612, T8M, T8L);
Chris@82 715 ci[WS(rs, 20)] = FMA(KP831469612, T8M, T8L);
Chris@82 716 T5y = FNMS(KP923879532, T4n, T3S);
Chris@82 717 T5B = T5z + T5A;
Chris@82 718 cr[WS(rs, 11)] = FMA(KP831469612, T5B, T5y);
Chris@82 719 ci[WS(rs, 4)] = FNMS(KP831469612, T5B, T5y);
Chris@82 720 }
Chris@82 721 {
Chris@82 722 E T5G, T5N, T8D, T8E;
Chris@82 723 T5G = FMA(KP923879532, T5F, T5C);
Chris@82 724 T5N = T5J + T5M;
Chris@82 725 cr[WS(rs, 15)] = FNMS(KP980785280, T5N, T5G);
Chris@82 726 ci[0] = FMA(KP980785280, T5N, T5G);
Chris@82 727 T8D = FMA(KP923879532, T8C, T8B);
Chris@82 728 T8E = T5Q - T5P;
Chris@82 729 cr[WS(rs, 31)] = FMS(KP980785280, T8E, T8D);
Chris@82 730 ci[WS(rs, 16)] = FMA(KP980785280, T8E, T8D);
Chris@82 731 }
Chris@82 732 {
Chris@82 733 E T8F, T8G, T5O, T5R;
Chris@82 734 T8F = FNMS(KP923879532, T8C, T8B);
Chris@82 735 T8G = T5M - T5J;
Chris@82 736 cr[WS(rs, 23)] = FMS(KP980785280, T8G, T8F);
Chris@82 737 ci[WS(rs, 24)] = FMA(KP980785280, T8G, T8F);
Chris@82 738 T5O = FNMS(KP923879532, T5F, T5C);
Chris@82 739 T5R = T5P + T5Q;
Chris@82 740 ci[WS(rs, 8)] = FNMS(KP980785280, T5R, T5O);
Chris@82 741 cr[WS(rs, 7)] = FMA(KP980785280, T5R, T5O);
Chris@82 742 }
Chris@82 743 }
Chris@82 744 {
Chris@82 745 E T6I, T7m, T7w, T7A, T8l, T8r, T6T, T8m, T75, T7k, T7p, T8s, T7t, T7z, T7g;
Chris@82 746 E T7j;
Chris@82 747 {
Chris@82 748 E T6E, T6H, T7u, T7v;
Chris@82 749 T6E = T8 - Tl;
Chris@82 750 T6H = T6F - T6G;
Chris@82 751 T6I = T6E - T6H;
Chris@82 752 T7m = T6E + T6H;
Chris@82 753 T7u = T76 + T79;
Chris@82 754 T7v = T7e - T7d;
Chris@82 755 T7w = FNMS(KP414213562, T7v, T7u);
Chris@82 756 T7A = FMA(KP414213562, T7u, T7v);
Chris@82 757 }
Chris@82 758 {
Chris@82 759 E T8j, T8k, T6N, T6S;
Chris@82 760 T8j = Tz - TM;
Chris@82 761 T8k = T87 - T83;
Chris@82 762 T8l = T8j + T8k;
Chris@82 763 T8r = T8k - T8j;
Chris@82 764 T6N = T6J + T6M;
Chris@82 765 T6S = T6O - T6R;
Chris@82 766 T6T = T6N + T6S;
Chris@82 767 T8m = T6N - T6S;
Chris@82 768 }
Chris@82 769 {
Chris@82 770 E T6Z, T74, T7n, T7o;
Chris@82 771 T6Z = T6V - T6Y;
Chris@82 772 T74 = T72 - T73;
Chris@82 773 T75 = FMA(KP414213562, T74, T6Z);
Chris@82 774 T7k = FNMS(KP414213562, T6Z, T74);
Chris@82 775 T7n = T6J - T6M;
Chris@82 776 T7o = T6O + T6R;
Chris@82 777 T7p = T7n + T7o;
Chris@82 778 T8s = T7o - T7n;
Chris@82 779 }
Chris@82 780 {
Chris@82 781 E T7r, T7s, T7a, T7f;
Chris@82 782 T7r = T6V + T6Y;
Chris@82 783 T7s = T72 + T73;
Chris@82 784 T7t = FNMS(KP414213562, T7s, T7r);
Chris@82 785 T7z = FMA(KP414213562, T7r, T7s);
Chris@82 786 T7a = T76 - T79;
Chris@82 787 T7f = T7d + T7e;
Chris@82 788 T7g = FNMS(KP414213562, T7f, T7a);
Chris@82 789 T7j = FMA(KP414213562, T7a, T7f);
Chris@82 790 }
Chris@82 791 {
Chris@82 792 E T6U, T7h, T8t, T8u;
Chris@82 793 T6U = FMA(KP707106781, T6T, T6I);
Chris@82 794 T7h = T75 + T7g;
Chris@82 795 ci[WS(rs, 13)] = FNMS(KP923879532, T7h, T6U);
Chris@82 796 cr[WS(rs, 2)] = FMA(KP923879532, T7h, T6U);
Chris@82 797 T8t = FMA(KP707106781, T8s, T8r);
Chris@82 798 T8u = T7k + T7j;
Chris@82 799 cr[WS(rs, 18)] = FMS(KP923879532, T8u, T8t);
Chris@82 800 ci[WS(rs, 29)] = FMA(KP923879532, T8u, T8t);
Chris@82 801 }
Chris@82 802 {
Chris@82 803 E T8v, T8w, T7i, T7l;
Chris@82 804 T8v = FNMS(KP707106781, T8s, T8r);
Chris@82 805 T8w = T7g - T75;
Chris@82 806 cr[WS(rs, 26)] = FMS(KP923879532, T8w, T8v);
Chris@82 807 ci[WS(rs, 21)] = FMA(KP923879532, T8w, T8v);
Chris@82 808 T7i = FNMS(KP707106781, T6T, T6I);
Chris@82 809 T7l = T7j - T7k;
Chris@82 810 cr[WS(rs, 10)] = FNMS(KP923879532, T7l, T7i);
Chris@82 811 ci[WS(rs, 5)] = FMA(KP923879532, T7l, T7i);
Chris@82 812 }
Chris@82 813 {
Chris@82 814 E T7q, T7x, T8n, T8o;
Chris@82 815 T7q = FMA(KP707106781, T7p, T7m);
Chris@82 816 T7x = T7t + T7w;
Chris@82 817 cr[WS(rs, 14)] = FNMS(KP923879532, T7x, T7q);
Chris@82 818 ci[WS(rs, 1)] = FMA(KP923879532, T7x, T7q);
Chris@82 819 T8n = FMA(KP707106781, T8m, T8l);
Chris@82 820 T8o = T7A - T7z;
Chris@82 821 cr[WS(rs, 30)] = FMS(KP923879532, T8o, T8n);
Chris@82 822 ci[WS(rs, 17)] = FMA(KP923879532, T8o, T8n);
Chris@82 823 }
Chris@82 824 {
Chris@82 825 E T8p, T8q, T7y, T7B;
Chris@82 826 T8p = FNMS(KP707106781, T8m, T8l);
Chris@82 827 T8q = T7w - T7t;
Chris@82 828 cr[WS(rs, 22)] = FMS(KP923879532, T8q, T8p);
Chris@82 829 ci[WS(rs, 25)] = FMA(KP923879532, T8q, T8p);
Chris@82 830 T7y = FNMS(KP707106781, T7p, T7m);
Chris@82 831 T7B = T7z + T7A;
Chris@82 832 ci[WS(rs, 9)] = FNMS(KP923879532, T7B, T7y);
Chris@82 833 cr[WS(rs, 6)] = FMA(KP923879532, T7B, T7y);
Chris@82 834 }
Chris@82 835 }
Chris@82 836 {
Chris@82 837 E T5W, T6o, T63, T8W, T8P, T8V, T6r, T8Q, T6i, T6C, T6l, T6y, T6b, T6B, T6m;
Chris@82 838 E T6v;
Chris@82 839 {
Chris@82 840 E T5S, T5V, T6p, T6q;
Chris@82 841 T5S = T3w + T3B;
Chris@82 842 T5V = T5T + T5U;
Chris@82 843 T5W = FMA(KP707106781, T5V, T5S);
Chris@82 844 T6o = FNMS(KP707106781, T5V, T5S);
Chris@82 845 {
Chris@82 846 E T5Z, T62, T8N, T8O;
Chris@82 847 T5Z = FMA(KP414213562, T5Y, T5X);
Chris@82 848 T62 = FNMS(KP414213562, T61, T60);
Chris@82 849 T63 = T5Z + T62;
Chris@82 850 T8W = T5Z - T62;
Chris@82 851 T8N = T8y - T8x;
Chris@82 852 T8O = T3Q - T3J;
Chris@82 853 T8P = FMA(KP707106781, T8O, T8N);
Chris@82 854 T8V = FNMS(KP707106781, T8O, T8N);
Chris@82 855 }
Chris@82 856 T6p = FMA(KP414213562, T60, T61);
Chris@82 857 T6q = FNMS(KP414213562, T5X, T5Y);
Chris@82 858 T6r = T6p - T6q;
Chris@82 859 T8Q = T6q + T6p;
Chris@82 860 {
Chris@82 861 E T6e, T6w, T6h, T6x, T6d, T6g;
Chris@82 862 T6d = T5t + T5s;
Chris@82 863 T6e = FMA(KP707106781, T6d, T6c);
Chris@82 864 T6w = FNMS(KP707106781, T6d, T6c);
Chris@82 865 T6g = T5i - T5b;
Chris@82 866 T6h = FMA(KP707106781, T6g, T6f);
Chris@82 867 T6x = FNMS(KP707106781, T6g, T6f);
Chris@82 868 T6i = FNMS(KP198912367, T6h, T6e);
Chris@82 869 T6C = FNMS(KP668178637, T6w, T6x);
Chris@82 870 T6l = FMA(KP198912367, T6e, T6h);
Chris@82 871 T6y = FMA(KP668178637, T6x, T6w);
Chris@82 872 }
Chris@82 873 {
Chris@82 874 E T67, T6t, T6a, T6u, T66, T69;
Chris@82 875 T66 = T4T - T4U;
Chris@82 876 T67 = FMA(KP707106781, T66, T65);
Chris@82 877 T6t = FNMS(KP707106781, T66, T65);
Chris@82 878 T69 = T4J - T4C;
Chris@82 879 T6a = FMA(KP707106781, T69, T68);
Chris@82 880 T6u = FNMS(KP707106781, T69, T68);
Chris@82 881 T6b = FMA(KP198912367, T6a, T67);
Chris@82 882 T6B = FMA(KP668178637, T6t, T6u);
Chris@82 883 T6m = FNMS(KP198912367, T67, T6a);
Chris@82 884 T6v = FNMS(KP668178637, T6u, T6t);
Chris@82 885 }
Chris@82 886 }
Chris@82 887 {
Chris@82 888 E T64, T6j, T8X, T8Y;
Chris@82 889 T64 = FMA(KP923879532, T63, T5W);
Chris@82 890 T6j = T6b + T6i;
Chris@82 891 ci[WS(rs, 14)] = FNMS(KP980785280, T6j, T64);
Chris@82 892 cr[WS(rs, 1)] = FMA(KP980785280, T6j, T64);
Chris@82 893 T8X = FMA(KP923879532, T8W, T8V);
Chris@82 894 T8Y = T6B + T6C;
Chris@82 895 cr[WS(rs, 29)] = -(FMA(KP831469612, T8Y, T8X));
Chris@82 896 ci[WS(rs, 18)] = FNMS(KP831469612, T8Y, T8X);
Chris@82 897 }
Chris@82 898 {
Chris@82 899 E T8Z, T90, T6k, T6n;
Chris@82 900 T8Z = FNMS(KP923879532, T8W, T8V);
Chris@82 901 T90 = T6y - T6v;
Chris@82 902 cr[WS(rs, 21)] = FMS(KP831469612, T90, T8Z);
Chris@82 903 ci[WS(rs, 26)] = FMA(KP831469612, T90, T8Z);
Chris@82 904 T6k = FNMS(KP923879532, T63, T5W);
Chris@82 905 T6n = T6l - T6m;
Chris@82 906 cr[WS(rs, 9)] = FNMS(KP980785280, T6n, T6k);
Chris@82 907 ci[WS(rs, 6)] = FMA(KP980785280, T6n, T6k);
Chris@82 908 }
Chris@82 909 {
Chris@82 910 E T6s, T6z, T8R, T8S;
Chris@82 911 T6s = FMA(KP923879532, T6r, T6o);
Chris@82 912 T6z = T6v + T6y;
Chris@82 913 cr[WS(rs, 13)] = FNMS(KP831469612, T6z, T6s);
Chris@82 914 ci[WS(rs, 2)] = FMA(KP831469612, T6z, T6s);
Chris@82 915 T8R = FMA(KP923879532, T8Q, T8P);
Chris@82 916 T8S = T6m + T6l;
Chris@82 917 cr[WS(rs, 17)] = FMS(KP980785280, T8S, T8R);
Chris@82 918 ci[WS(rs, 30)] = FMA(KP980785280, T8S, T8R);
Chris@82 919 }
Chris@82 920 {
Chris@82 921 E T8T, T8U, T6A, T6D;
Chris@82 922 T8T = FNMS(KP923879532, T8Q, T8P);
Chris@82 923 T8U = T6i - T6b;
Chris@82 924 cr[WS(rs, 25)] = FMS(KP980785280, T8U, T8T);
Chris@82 925 ci[WS(rs, 22)] = FMA(KP980785280, T8U, T8T);
Chris@82 926 T6A = FNMS(KP923879532, T6r, T6o);
Chris@82 927 T6D = T6B - T6C;
Chris@82 928 ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);
Chris@82 929 cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);
Chris@82 930 }
Chris@82 931 }
Chris@82 932 }
Chris@82 933 }
Chris@82 934 }
Chris@82 935
Chris@82 936 static const tw_instr twinstr[] = {
Chris@82 937 {TW_FULL, 1, 32},
Chris@82 938 {TW_NEXT, 1, 0}
Chris@82 939 };
Chris@82 940
Chris@82 941 static const hc2hc_desc desc = { 32, "hf_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@82 942
Chris@82 943 void X(codelet_hf_32) (planner *p) {
Chris@82 944 X(khc2hc_register) (p, hf_32, &desc);
Chris@82 945 }
Chris@82 946 #else
Chris@82 947
Chris@82 948 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include rdft/scalar/hf.h */
Chris@82 949
Chris@82 950 /*
Chris@82 951 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 952 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 953 * 96 stack variables, 7 constants, and 128 memory accesses
Chris@82 954 */
Chris@82 955 #include "rdft/scalar/hf.h"
Chris@82 956
Chris@82 957 static void hf_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 958 {
Chris@82 959 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 960 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 961 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 962 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 963 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 964 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 965 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 966 {
Chris@82 967 INT m;
Chris@82 968 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 969 E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;
Chris@82 970 E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;
Chris@82 971 E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;
Chris@82 972 E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;
Chris@82 973 E T4m, T5e, T4v, T5h;
Chris@82 974 {
Chris@82 975 E T1, T76, T6, T75, Tc, T32, Th, T33;
Chris@82 976 T1 = cr[0];
Chris@82 977 T76 = ci[0];
Chris@82 978 {
Chris@82 979 E T3, T5, T2, T4;
Chris@82 980 T3 = cr[WS(rs, 16)];
Chris@82 981 T5 = ci[WS(rs, 16)];
Chris@82 982 T2 = W[30];
Chris@82 983 T4 = W[31];
Chris@82 984 T6 = FMA(T2, T3, T4 * T5);
Chris@82 985 T75 = FNMS(T4, T3, T2 * T5);
Chris@82 986 }
Chris@82 987 {
Chris@82 988 E T9, Tb, T8, Ta;
Chris@82 989 T9 = cr[WS(rs, 8)];
Chris@82 990 Tb = ci[WS(rs, 8)];
Chris@82 991 T8 = W[14];
Chris@82 992 Ta = W[15];
Chris@82 993 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 994 T32 = FNMS(Ta, T9, T8 * Tb);
Chris@82 995 }
Chris@82 996 {
Chris@82 997 E Te, Tg, Td, Tf;
Chris@82 998 Te = cr[WS(rs, 24)];
Chris@82 999 Tg = ci[WS(rs, 24)];
Chris@82 1000 Td = W[46];
Chris@82 1001 Tf = W[47];
Chris@82 1002 Th = FMA(Td, Te, Tf * Tg);
Chris@82 1003 T33 = FNMS(Tf, Te, Td * Tg);
Chris@82 1004 }
Chris@82 1005 {
Chris@82 1006 E T7, Ti, T7A, T7B;
Chris@82 1007 T7 = T1 + T6;
Chris@82 1008 Ti = Tc + Th;
Chris@82 1009 Tj = T7 + Ti;
Chris@82 1010 T5F = T7 - Ti;
Chris@82 1011 T7A = Tc - Th;
Chris@82 1012 T7B = T76 - T75;
Chris@82 1013 T7C = T7A + T7B;
Chris@82 1014 T7Q = T7B - T7A;
Chris@82 1015 }
Chris@82 1016 {
Chris@82 1017 E T31, T34, T74, T77;
Chris@82 1018 T31 = T1 - T6;
Chris@82 1019 T34 = T32 - T33;
Chris@82 1020 T35 = T31 + T34;
Chris@82 1021 T4T = T31 - T34;
Chris@82 1022 T74 = T32 + T33;
Chris@82 1023 T77 = T75 + T76;
Chris@82 1024 T78 = T74 + T77;
Chris@82 1025 T7m = T77 - T74;
Chris@82 1026 }
Chris@82 1027 }
Chris@82 1028 {
Chris@82 1029 E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;
Chris@82 1030 {
Chris@82 1031 E T1v, T1x, T1u, T1w;
Chris@82 1032 T1v = cr[WS(rs, 1)];
Chris@82 1033 T1x = ci[WS(rs, 1)];
Chris@82 1034 T1u = W[0];
Chris@82 1035 T1w = W[1];
Chris@82 1036 T1y = FMA(T1u, T1v, T1w * T1x);
Chris@82 1037 T3X = FNMS(T1w, T1v, T1u * T1x);
Chris@82 1038 }
Chris@82 1039 {
Chris@82 1040 E T1L, T1N, T1K, T1M;
Chris@82 1041 T1L = cr[WS(rs, 25)];
Chris@82 1042 T1N = ci[WS(rs, 25)];
Chris@82 1043 T1K = W[48];
Chris@82 1044 T1M = W[49];
Chris@82 1045 T1O = FMA(T1K, T1L, T1M * T1N);
Chris@82 1046 T3I = FNMS(T1M, T1L, T1K * T1N);
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T1A, T1C, T1z, T1B;
Chris@82 1050 T1A = cr[WS(rs, 17)];
Chris@82 1051 T1C = ci[WS(rs, 17)];
Chris@82 1052 T1z = W[32];
Chris@82 1053 T1B = W[33];
Chris@82 1054 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@82 1055 T3Y = FNMS(T1B, T1A, T1z * T1C);
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T1G, T1I, T1F, T1H;
Chris@82 1059 T1G = cr[WS(rs, 9)];
Chris@82 1060 T1I = ci[WS(rs, 9)];
Chris@82 1061 T1F = W[16];
Chris@82 1062 T1H = W[17];
Chris@82 1063 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@82 1064 T3H = FNMS(T1H, T1G, T1F * T1I);
Chris@82 1065 }
Chris@82 1066 {
Chris@82 1067 E T1E, T1P, T5W, T5X;
Chris@82 1068 T1E = T1y + T1D;
Chris@82 1069 T1P = T1J + T1O;
Chris@82 1070 T1Q = T1E + T1P;
Chris@82 1071 T61 = T1E - T1P;
Chris@82 1072 T5W = T3X + T3Y;
Chris@82 1073 T5X = T3H + T3I;
Chris@82 1074 T5Y = T5W - T5X;
Chris@82 1075 T6J = T5W + T5X;
Chris@82 1076 }
Chris@82 1077 {
Chris@82 1078 E T3G, T3J, T3Z, T40;
Chris@82 1079 T3G = T1y - T1D;
Chris@82 1080 T3J = T3H - T3I;
Chris@82 1081 T3K = T3G + T3J;
Chris@82 1082 T56 = T3G - T3J;
Chris@82 1083 T3Z = T3X - T3Y;
Chris@82 1084 T40 = T1J - T1O;
Chris@82 1085 T41 = T3Z - T40;
Chris@82 1086 T59 = T3Z + T40;
Chris@82 1087 }
Chris@82 1088 }
Chris@82 1089 {
Chris@82 1090 E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;
Chris@82 1091 {
Chris@82 1092 E T2g, T2i, T2f, T2h;
Chris@82 1093 T2g = cr[WS(rs, 31)];
Chris@82 1094 T2i = ci[WS(rs, 31)];
Chris@82 1095 T2f = W[60];
Chris@82 1096 T2h = W[61];
Chris@82 1097 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@82 1098 T47 = FNMS(T2h, T2g, T2f * T2i);
Chris@82 1099 }
Chris@82 1100 {
Chris@82 1101 E T2w, T2y, T2v, T2x;
Chris@82 1102 T2w = cr[WS(rs, 23)];
Chris@82 1103 T2y = ci[WS(rs, 23)];
Chris@82 1104 T2v = W[44];
Chris@82 1105 T2x = W[45];
Chris@82 1106 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@82 1107 T4q = FNMS(T2x, T2w, T2v * T2y);
Chris@82 1108 }
Chris@82 1109 {
Chris@82 1110 E T2l, T2n, T2k, T2m;
Chris@82 1111 T2l = cr[WS(rs, 15)];
Chris@82 1112 T2n = ci[WS(rs, 15)];
Chris@82 1113 T2k = W[28];
Chris@82 1114 T2m = W[29];
Chris@82 1115 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@82 1116 T48 = FNMS(T2m, T2l, T2k * T2n);
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E T2r, T2t, T2q, T2s;
Chris@82 1120 T2r = cr[WS(rs, 7)];
Chris@82 1121 T2t = ci[WS(rs, 7)];
Chris@82 1122 T2q = W[12];
Chris@82 1123 T2s = W[13];
Chris@82 1124 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@82 1125 T4p = FNMS(T2s, T2r, T2q * T2t);
Chris@82 1126 }
Chris@82 1127 {
Chris@82 1128 E T2p, T2A, T6c, T6d;
Chris@82 1129 T2p = T2j + T2o;
Chris@82 1130 T2A = T2u + T2z;
Chris@82 1131 T2B = T2p + T2A;
Chris@82 1132 T67 = T2p - T2A;
Chris@82 1133 T6c = T47 + T48;
Chris@82 1134 T6d = T4p + T4q;
Chris@82 1135 T6e = T6c - T6d;
Chris@82 1136 T6O = T6c + T6d;
Chris@82 1137 }
Chris@82 1138 {
Chris@82 1139 E T49, T4a, T4o, T4r;
Chris@82 1140 T49 = T47 - T48;
Chris@82 1141 T4a = T2u - T2z;
Chris@82 1142 T4b = T49 - T4a;
Chris@82 1143 T5g = T49 + T4a;
Chris@82 1144 T4o = T2j - T2o;
Chris@82 1145 T4r = T4p - T4q;
Chris@82 1146 T4s = T4o + T4r;
Chris@82 1147 T5d = T4o - T4r;
Chris@82 1148 }
Chris@82 1149 }
Chris@82 1150 {
Chris@82 1151 E To, T37, TE, T3d, Tt, T38, Tz, T3c;
Chris@82 1152 {
Chris@82 1153 E Tl, Tn, Tk, Tm;
Chris@82 1154 Tl = cr[WS(rs, 4)];
Chris@82 1155 Tn = ci[WS(rs, 4)];
Chris@82 1156 Tk = W[6];
Chris@82 1157 Tm = W[7];
Chris@82 1158 To = FMA(Tk, Tl, Tm * Tn);
Chris@82 1159 T37 = FNMS(Tm, Tl, Tk * Tn);
Chris@82 1160 }
Chris@82 1161 {
Chris@82 1162 E TB, TD, TA, TC;
Chris@82 1163 TB = cr[WS(rs, 12)];
Chris@82 1164 TD = ci[WS(rs, 12)];
Chris@82 1165 TA = W[22];
Chris@82 1166 TC = W[23];
Chris@82 1167 TE = FMA(TA, TB, TC * TD);
Chris@82 1168 T3d = FNMS(TC, TB, TA * TD);
Chris@82 1169 }
Chris@82 1170 {
Chris@82 1171 E Tq, Ts, Tp, Tr;
Chris@82 1172 Tq = cr[WS(rs, 20)];
Chris@82 1173 Ts = ci[WS(rs, 20)];
Chris@82 1174 Tp = W[38];
Chris@82 1175 Tr = W[39];
Chris@82 1176 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 1177 T38 = FNMS(Tr, Tq, Tp * Ts);
Chris@82 1178 }
Chris@82 1179 {
Chris@82 1180 E Tw, Ty, Tv, Tx;
Chris@82 1181 Tw = cr[WS(rs, 28)];
Chris@82 1182 Ty = ci[WS(rs, 28)];
Chris@82 1183 Tv = W[54];
Chris@82 1184 Tx = W[55];
Chris@82 1185 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 1186 T3c = FNMS(Tx, Tw, Tv * Ty);
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E Tu, TF, T5G, T5H;
Chris@82 1190 Tu = To + Tt;
Chris@82 1191 TF = Tz + TE;
Chris@82 1192 TG = Tu + TF;
Chris@82 1193 T7l = Tu - TF;
Chris@82 1194 T5G = T3c + T3d;
Chris@82 1195 T5H = T37 + T38;
Chris@82 1196 T5I = T5G - T5H;
Chris@82 1197 T73 = T5H + T5G;
Chris@82 1198 }
Chris@82 1199 {
Chris@82 1200 E T36, T39, T3b, T3e;
Chris@82 1201 T36 = To - Tt;
Chris@82 1202 T39 = T37 - T38;
Chris@82 1203 T3a = T36 + T39;
Chris@82 1204 T4U = T36 - T39;
Chris@82 1205 T3b = Tz - TE;
Chris@82 1206 T3e = T3c - T3d;
Chris@82 1207 T3f = T3b - T3e;
Chris@82 1208 T4V = T3b + T3e;
Chris@82 1209 }
Chris@82 1210 }
Chris@82 1211 {
Chris@82 1212 E TM, T3n, T12, T3k, TR, T3o, TX, T3j;
Chris@82 1213 {
Chris@82 1214 E TJ, TL, TI, TK;
Chris@82 1215 TJ = cr[WS(rs, 2)];
Chris@82 1216 TL = ci[WS(rs, 2)];
Chris@82 1217 TI = W[2];
Chris@82 1218 TK = W[3];
Chris@82 1219 TM = FMA(TI, TJ, TK * TL);
Chris@82 1220 T3n = FNMS(TK, TJ, TI * TL);
Chris@82 1221 }
Chris@82 1222 {
Chris@82 1223 E TZ, T11, TY, T10;
Chris@82 1224 TZ = cr[WS(rs, 26)];
Chris@82 1225 T11 = ci[WS(rs, 26)];
Chris@82 1226 TY = W[50];
Chris@82 1227 T10 = W[51];
Chris@82 1228 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1229 T3k = FNMS(T10, TZ, TY * T11);
Chris@82 1230 }
Chris@82 1231 {
Chris@82 1232 E TO, TQ, TN, TP;
Chris@82 1233 TO = cr[WS(rs, 18)];
Chris@82 1234 TQ = ci[WS(rs, 18)];
Chris@82 1235 TN = W[34];
Chris@82 1236 TP = W[35];
Chris@82 1237 TR = FMA(TN, TO, TP * TQ);
Chris@82 1238 T3o = FNMS(TP, TO, TN * TQ);
Chris@82 1239 }
Chris@82 1240 {
Chris@82 1241 E TU, TW, TT, TV;
Chris@82 1242 TU = cr[WS(rs, 10)];
Chris@82 1243 TW = ci[WS(rs, 10)];
Chris@82 1244 TT = W[18];
Chris@82 1245 TV = W[19];
Chris@82 1246 TX = FMA(TT, TU, TV * TW);
Chris@82 1247 T3j = FNMS(TV, TU, TT * TW);
Chris@82 1248 }
Chris@82 1249 {
Chris@82 1250 E TS, T13, T5L, T5M;
Chris@82 1251 TS = TM + TR;
Chris@82 1252 T13 = TX + T12;
Chris@82 1253 T14 = TS + T13;
Chris@82 1254 T5K = TS - T13;
Chris@82 1255 T5L = T3n + T3o;
Chris@82 1256 T5M = T3j + T3k;
Chris@82 1257 T5N = T5L - T5M;
Chris@82 1258 T6F = T5L + T5M;
Chris@82 1259 }
Chris@82 1260 {
Chris@82 1261 E T3i, T3l, T3p, T3q;
Chris@82 1262 T3i = TM - TR;
Chris@82 1263 T3l = T3j - T3k;
Chris@82 1264 T3m = T3i + T3l;
Chris@82 1265 T4Z = T3i - T3l;
Chris@82 1266 T3p = T3n - T3o;
Chris@82 1267 T3q = TX - T12;
Chris@82 1268 T3r = T3p - T3q;
Chris@82 1269 T4Y = T3p + T3q;
Chris@82 1270 }
Chris@82 1271 }
Chris@82 1272 {
Chris@82 1273 E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
Chris@82 1274 {
Chris@82 1275 E T16, T18, T15, T17;
Chris@82 1276 T16 = cr[WS(rs, 30)];
Chris@82 1277 T18 = ci[WS(rs, 30)];
Chris@82 1278 T15 = W[58];
Chris@82 1279 T17 = W[59];
Chris@82 1280 T19 = FMA(T15, T16, T17 * T18);
Chris@82 1281 T3t = FNMS(T17, T16, T15 * T18);
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E T1m, T1o, T1l, T1n;
Chris@82 1285 T1m = cr[WS(rs, 22)];
Chris@82 1286 T1o = ci[WS(rs, 22)];
Chris@82 1287 T1l = W[42];
Chris@82 1288 T1n = W[43];
Chris@82 1289 T1p = FMA(T1l, T1m, T1n * T1o);
Chris@82 1290 T3A = FNMS(T1n, T1m, T1l * T1o);
Chris@82 1291 }
Chris@82 1292 {
Chris@82 1293 E T1b, T1d, T1a, T1c;
Chris@82 1294 T1b = cr[WS(rs, 14)];
Chris@82 1295 T1d = ci[WS(rs, 14)];
Chris@82 1296 T1a = W[26];
Chris@82 1297 T1c = W[27];
Chris@82 1298 T1e = FMA(T1a, T1b, T1c * T1d);
Chris@82 1299 T3u = FNMS(T1c, T1b, T1a * T1d);
Chris@82 1300 }
Chris@82 1301 {
Chris@82 1302 E T1h, T1j, T1g, T1i;
Chris@82 1303 T1h = cr[WS(rs, 6)];
Chris@82 1304 T1j = ci[WS(rs, 6)];
Chris@82 1305 T1g = W[10];
Chris@82 1306 T1i = W[11];
Chris@82 1307 T1k = FMA(T1g, T1h, T1i * T1j);
Chris@82 1308 T3z = FNMS(T1i, T1h, T1g * T1j);
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T1f, T1q, T5Q, T5R;
Chris@82 1312 T1f = T19 + T1e;
Chris@82 1313 T1q = T1k + T1p;
Chris@82 1314 T1r = T1f + T1q;
Chris@82 1315 T5P = T1f - T1q;
Chris@82 1316 T5Q = T3t + T3u;
Chris@82 1317 T5R = T3z + T3A;
Chris@82 1318 T5S = T5Q - T5R;
Chris@82 1319 T6E = T5Q + T5R;
Chris@82 1320 }
Chris@82 1321 {
Chris@82 1322 E T3v, T3w, T3y, T3B;
Chris@82 1323 T3v = T3t - T3u;
Chris@82 1324 T3w = T1k - T1p;
Chris@82 1325 T3x = T3v - T3w;
Chris@82 1326 T52 = T3v + T3w;
Chris@82 1327 T3y = T19 - T1e;
Chris@82 1328 T3B = T3z - T3A;
Chris@82 1329 T3C = T3y + T3B;
Chris@82 1330 T51 = T3y - T3B;
Chris@82 1331 }
Chris@82 1332 }
Chris@82 1333 {
Chris@82 1334 E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;
Chris@82 1335 {
Chris@82 1336 E T1S, T1U, T1R, T1T;
Chris@82 1337 T1S = cr[WS(rs, 5)];
Chris@82 1338 T1U = ci[WS(rs, 5)];
Chris@82 1339 T1R = W[8];
Chris@82 1340 T1T = W[9];
Chris@82 1341 T1V = FMA(T1R, T1S, T1T * T1U);
Chris@82 1342 T3M = FNMS(T1T, T1S, T1R * T1U);
Chris@82 1343 }
Chris@82 1344 {
Chris@82 1345 E T1X, T1Z, T1W, T1Y;
Chris@82 1346 T1X = cr[WS(rs, 21)];
Chris@82 1347 T1Z = ci[WS(rs, 21)];
Chris@82 1348 T1W = W[40];
Chris@82 1349 T1Y = W[41];
Chris@82 1350 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 1351 T3N = FNMS(T1Y, T1X, T1W * T1Z);
Chris@82 1352 }
Chris@82 1353 T3L = T1V - T20;
Chris@82 1354 T3O = T3M - T3N;
Chris@82 1355 {
Chris@82 1356 E T23, T25, T22, T24;
Chris@82 1357 T23 = cr[WS(rs, 29)];
Chris@82 1358 T25 = ci[WS(rs, 29)];
Chris@82 1359 T22 = W[56];
Chris@82 1360 T24 = W[57];
Chris@82 1361 T26 = FMA(T22, T23, T24 * T25);
Chris@82 1362 T3Q = FNMS(T24, T23, T22 * T25);
Chris@82 1363 }
Chris@82 1364 {
Chris@82 1365 E T28, T2a, T27, T29;
Chris@82 1366 T28 = cr[WS(rs, 13)];
Chris@82 1367 T2a = ci[WS(rs, 13)];
Chris@82 1368 T27 = W[24];
Chris@82 1369 T29 = W[25];
Chris@82 1370 T2b = FMA(T27, T28, T29 * T2a);
Chris@82 1371 T3R = FNMS(T29, T28, T27 * T2a);
Chris@82 1372 }
Chris@82 1373 T3S = T3Q - T3R;
Chris@82 1374 T3T = T26 - T2b;
Chris@82 1375 {
Chris@82 1376 E T21, T2c, T62, T63;
Chris@82 1377 T21 = T1V + T20;
Chris@82 1378 T2c = T26 + T2b;
Chris@82 1379 T2d = T21 + T2c;
Chris@82 1380 T5Z = T21 - T2c;
Chris@82 1381 T62 = T3Q + T3R;
Chris@82 1382 T63 = T3M + T3N;
Chris@82 1383 T64 = T62 - T63;
Chris@82 1384 T6K = T63 + T62;
Chris@82 1385 }
Chris@82 1386 {
Chris@82 1387 E T3P, T3U, T42, T43;
Chris@82 1388 T3P = T3L + T3O;
Chris@82 1389 T3U = T3S - T3T;
Chris@82 1390 T3V = KP707106781 * (T3P - T3U);
Chris@82 1391 T5a = KP707106781 * (T3P + T3U);
Chris@82 1392 T42 = T3T + T3S;
Chris@82 1393 T43 = T3L - T3O;
Chris@82 1394 T44 = KP707106781 * (T42 - T43);
Chris@82 1395 T57 = KP707106781 * (T43 + T42);
Chris@82 1396 }
Chris@82 1397 }
Chris@82 1398 {
Chris@82 1399 E T2G, T4i, T2L, T4j, T4h, T4k, T2R, T4d, T2W, T4e, T4c, T4f;
Chris@82 1400 {
Chris@82 1401 E T2D, T2F, T2C, T2E;
Chris@82 1402 T2D = cr[WS(rs, 3)];
Chris@82 1403 T2F = ci[WS(rs, 3)];
Chris@82 1404 T2C = W[4];
Chris@82 1405 T2E = W[5];
Chris@82 1406 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@82 1407 T4i = FNMS(T2E, T2D, T2C * T2F);
Chris@82 1408 }
Chris@82 1409 {
Chris@82 1410 E T2I, T2K, T2H, T2J;
Chris@82 1411 T2I = cr[WS(rs, 19)];
Chris@82 1412 T2K = ci[WS(rs, 19)];
Chris@82 1413 T2H = W[36];
Chris@82 1414 T2J = W[37];
Chris@82 1415 T2L = FMA(T2H, T2I, T2J * T2K);
Chris@82 1416 T4j = FNMS(T2J, T2I, T2H * T2K);
Chris@82 1417 }
Chris@82 1418 T4h = T2G - T2L;
Chris@82 1419 T4k = T4i - T4j;
Chris@82 1420 {
Chris@82 1421 E T2O, T2Q, T2N, T2P;
Chris@82 1422 T2O = cr[WS(rs, 27)];
Chris@82 1423 T2Q = ci[WS(rs, 27)];
Chris@82 1424 T2N = W[52];
Chris@82 1425 T2P = W[53];
Chris@82 1426 T2R = FMA(T2N, T2O, T2P * T2Q);
Chris@82 1427 T4d = FNMS(T2P, T2O, T2N * T2Q);
Chris@82 1428 }
Chris@82 1429 {
Chris@82 1430 E T2T, T2V, T2S, T2U;
Chris@82 1431 T2T = cr[WS(rs, 11)];
Chris@82 1432 T2V = ci[WS(rs, 11)];
Chris@82 1433 T2S = W[20];
Chris@82 1434 T2U = W[21];
Chris@82 1435 T2W = FMA(T2S, T2T, T2U * T2V);
Chris@82 1436 T4e = FNMS(T2U, T2T, T2S * T2V);
Chris@82 1437 }
Chris@82 1438 T4c = T2R - T2W;
Chris@82 1439 T4f = T4d - T4e;
Chris@82 1440 {
Chris@82 1441 E T2M, T2X, T68, T69;
Chris@82 1442 T2M = T2G + T2L;
Chris@82 1443 T2X = T2R + T2W;
Chris@82 1444 T2Y = T2M + T2X;
Chris@82 1445 T6f = T2M - T2X;
Chris@82 1446 T68 = T4d + T4e;
Chris@82 1447 T69 = T4i + T4j;
Chris@82 1448 T6a = T68 - T69;
Chris@82 1449 T6P = T69 + T68;
Chris@82 1450 }
Chris@82 1451 {
Chris@82 1452 E T4g, T4l, T4t, T4u;
Chris@82 1453 T4g = T4c + T4f;
Chris@82 1454 T4l = T4h - T4k;
Chris@82 1455 T4m = KP707106781 * (T4g - T4l);
Chris@82 1456 T5e = KP707106781 * (T4l + T4g);
Chris@82 1457 T4t = T4h + T4k;
Chris@82 1458 T4u = T4f - T4c;
Chris@82 1459 T4v = KP707106781 * (T4t - T4u);
Chris@82 1460 T5h = KP707106781 * (T4t + T4u);
Chris@82 1461 }
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
Chris@82 1465 {
Chris@82 1466 E TH, T1s, T72, T79;
Chris@82 1467 TH = Tj + TG;
Chris@82 1468 T1s = T14 + T1r;
Chris@82 1469 T1t = TH + T1s;
Chris@82 1470 T6X = TH - T1s;
Chris@82 1471 T72 = T6F + T6E;
Chris@82 1472 T79 = T73 + T78;
Chris@82 1473 T7a = T72 + T79;
Chris@82 1474 T7c = T79 - T72;
Chris@82 1475 }
Chris@82 1476 {
Chris@82 1477 E T2e, T2Z, T6Y, T6Z;
Chris@82 1478 T2e = T1Q + T2d;
Chris@82 1479 T2Z = T2B + T2Y;
Chris@82 1480 T30 = T2e + T2Z;
Chris@82 1481 T7b = T2Z - T2e;
Chris@82 1482 T6Y = T6O + T6P;
Chris@82 1483 T6Z = T6J + T6K;
Chris@82 1484 T70 = T6Y - T6Z;
Chris@82 1485 T71 = T6Z + T6Y;
Chris@82 1486 }
Chris@82 1487 ci[WS(rs, 15)] = T1t - T30;
Chris@82 1488 cr[WS(rs, 24)] = T7b - T7c;
Chris@82 1489 ci[WS(rs, 23)] = T7b + T7c;
Chris@82 1490 cr[0] = T1t + T30;
Chris@82 1491 cr[WS(rs, 8)] = T6X - T70;
Chris@82 1492 cr[WS(rs, 16)] = T71 - T7a;
Chris@82 1493 ci[WS(rs, 31)] = T71 + T7a;
Chris@82 1494 ci[WS(rs, 7)] = T6X + T70;
Chris@82 1495 }
Chris@82 1496 {
Chris@82 1497 E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
Chris@82 1498 E T5n, T4W, T7z;
Chris@82 1499 T4W = KP707106781 * (T4U + T4V);
Chris@82 1500 T4X = T4T - T4W;
Chris@82 1501 T5p = T4T + T4W;
Chris@82 1502 T7z = KP707106781 * (T3a - T3f);
Chris@82 1503 T7D = T7z + T7C;
Chris@82 1504 T7J = T7C - T7z;
Chris@82 1505 {
Chris@82 1506 E T50, T53, T5x, T5y;
Chris@82 1507 T50 = FMA(KP923879532, T4Y, KP382683432 * T4Z);
Chris@82 1508 T53 = FNMS(KP923879532, T52, KP382683432 * T51);
Chris@82 1509 T54 = T50 + T53;
Chris@82 1510 T7y = T50 - T53;
Chris@82 1511 T5x = T5d + T5e;
Chris@82 1512 T5y = T5g + T5h;
Chris@82 1513 T5z = FNMS(KP980785280, T5y, KP195090322 * T5x);
Chris@82 1514 T5D = FMA(KP980785280, T5x, KP195090322 * T5y);
Chris@82 1515 }
Chris@82 1516 {
Chris@82 1517 E T58, T5b, T5q, T5r;
Chris@82 1518 T58 = T56 - T57;
Chris@82 1519 T5b = T59 - T5a;
Chris@82 1520 T5c = FMA(KP831469612, T58, KP555570233 * T5b);
Chris@82 1521 T5m = FNMS(KP831469612, T5b, KP555570233 * T58);
Chris@82 1522 T5q = FNMS(KP382683432, T4Y, KP923879532 * T4Z);
Chris@82 1523 T5r = FMA(KP382683432, T52, KP923879532 * T51);
Chris@82 1524 T5s = T5q + T5r;
Chris@82 1525 T7I = T5r - T5q;
Chris@82 1526 }
Chris@82 1527 {
Chris@82 1528 E T5u, T5v, T5f, T5i;
Chris@82 1529 T5u = T56 + T57;
Chris@82 1530 T5v = T59 + T5a;
Chris@82 1531 T5w = FMA(KP195090322, T5u, KP980785280 * T5v);
Chris@82 1532 T5C = FNMS(KP195090322, T5v, KP980785280 * T5u);
Chris@82 1533 T5f = T5d - T5e;
Chris@82 1534 T5i = T5g - T5h;
Chris@82 1535 T5j = FNMS(KP555570233, T5i, KP831469612 * T5f);
Chris@82 1536 T5n = FMA(KP555570233, T5f, KP831469612 * T5i);
Chris@82 1537 }
Chris@82 1538 {
Chris@82 1539 E T55, T5k, T7H, T7K;
Chris@82 1540 T55 = T4X + T54;
Chris@82 1541 T5k = T5c + T5j;
Chris@82 1542 ci[WS(rs, 12)] = T55 - T5k;
Chris@82 1543 cr[WS(rs, 3)] = T55 + T5k;
Chris@82 1544 T7H = T5n - T5m;
Chris@82 1545 T7K = T7I + T7J;
Chris@82 1546 cr[WS(rs, 19)] = T7H - T7K;
Chris@82 1547 ci[WS(rs, 28)] = T7H + T7K;
Chris@82 1548 }
Chris@82 1549 {
Chris@82 1550 E T7L, T7M, T5l, T5o;
Chris@82 1551 T7L = T5j - T5c;
Chris@82 1552 T7M = T7J - T7I;
Chris@82 1553 cr[WS(rs, 27)] = T7L - T7M;
Chris@82 1554 ci[WS(rs, 20)] = T7L + T7M;
Chris@82 1555 T5l = T4X - T54;
Chris@82 1556 T5o = T5m + T5n;
Chris@82 1557 cr[WS(rs, 11)] = T5l - T5o;
Chris@82 1558 ci[WS(rs, 4)] = T5l + T5o;
Chris@82 1559 }
Chris@82 1560 {
Chris@82 1561 E T5t, T5A, T7x, T7E;
Chris@82 1562 T5t = T5p - T5s;
Chris@82 1563 T5A = T5w + T5z;
Chris@82 1564 ci[WS(rs, 8)] = T5t - T5A;
Chris@82 1565 cr[WS(rs, 7)] = T5t + T5A;
Chris@82 1566 T7x = T5z - T5w;
Chris@82 1567 T7E = T7y + T7D;
Chris@82 1568 cr[WS(rs, 31)] = T7x - T7E;
Chris@82 1569 ci[WS(rs, 16)] = T7x + T7E;
Chris@82 1570 }
Chris@82 1571 {
Chris@82 1572 E T7F, T7G, T5B, T5E;
Chris@82 1573 T7F = T5D - T5C;
Chris@82 1574 T7G = T7D - T7y;
Chris@82 1575 cr[WS(rs, 23)] = T7F - T7G;
Chris@82 1576 ci[WS(rs, 24)] = T7F + T7G;
Chris@82 1577 T5B = T5p + T5s;
Chris@82 1578 T5E = T5C + T5D;
Chris@82 1579 cr[WS(rs, 15)] = T5B - T5E;
Chris@82 1580 ci[0] = T5B + T5E;
Chris@82 1581 }
Chris@82 1582 }
Chris@82 1583 {
Chris@82 1584 E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
Chris@82 1585 {
Chris@82 1586 E T6D, T6G, T7e, T7f;
Chris@82 1587 T6D = Tj - TG;
Chris@82 1588 T6G = T6E - T6F;
Chris@82 1589 T6H = T6D - T6G;
Chris@82 1590 T6T = T6D + T6G;
Chris@82 1591 T7e = T14 - T1r;
Chris@82 1592 T7f = T78 - T73;
Chris@82 1593 T7g = T7e + T7f;
Chris@82 1594 T7i = T7f - T7e;
Chris@82 1595 }
Chris@82 1596 {
Chris@82 1597 E T6I, T6L, T6N, T6Q;
Chris@82 1598 T6I = T1Q - T2d;
Chris@82 1599 T6L = T6J - T6K;
Chris@82 1600 T6M = T6I + T6L;
Chris@82 1601 T6U = T6I - T6L;
Chris@82 1602 T6N = T2B - T2Y;
Chris@82 1603 T6Q = T6O - T6P;
Chris@82 1604 T6R = T6N - T6Q;
Chris@82 1605 T6V = T6N + T6Q;
Chris@82 1606 }
Chris@82 1607 {
Chris@82 1608 E T6S, T7h, T6W, T7d;
Chris@82 1609 T6S = KP707106781 * (T6M + T6R);
Chris@82 1610 ci[WS(rs, 11)] = T6H - T6S;
Chris@82 1611 cr[WS(rs, 4)] = T6H + T6S;
Chris@82 1612 T7h = KP707106781 * (T6V - T6U);
Chris@82 1613 cr[WS(rs, 20)] = T7h - T7i;
Chris@82 1614 ci[WS(rs, 27)] = T7h + T7i;
Chris@82 1615 T6W = KP707106781 * (T6U + T6V);
Chris@82 1616 cr[WS(rs, 12)] = T6T - T6W;
Chris@82 1617 ci[WS(rs, 3)] = T6T + T6W;
Chris@82 1618 T7d = KP707106781 * (T6R - T6M);
Chris@82 1619 cr[WS(rs, 28)] = T7d - T7g;
Chris@82 1620 ci[WS(rs, 19)] = T7d + T7g;
Chris@82 1621 }
Chris@82 1622 }
Chris@82 1623 {
Chris@82 1624 E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
Chris@82 1625 E T6l;
Chris@82 1626 {
Chris@82 1627 E T5O, T5T, T60, T65;
Chris@82 1628 T5J = T5F - T5I;
Chris@82 1629 T7n = T7l + T7m;
Chris@82 1630 T7t = T7m - T7l;
Chris@82 1631 T6n = T5F + T5I;
Chris@82 1632 T5O = T5K + T5N;
Chris@82 1633 T5T = T5P - T5S;
Chris@82 1634 T5U = KP707106781 * (T5O + T5T);
Chris@82 1635 T7k = KP707106781 * (T5O - T5T);
Chris@82 1636 {
Chris@82 1637 E T6v, T6w, T6o, T6p;
Chris@82 1638 T6v = T6e + T6f;
Chris@82 1639 T6w = T67 + T6a;
Chris@82 1640 T6x = FMA(KP382683432, T6v, KP923879532 * T6w);
Chris@82 1641 T6B = FNMS(KP923879532, T6v, KP382683432 * T6w);
Chris@82 1642 T6o = T5K - T5N;
Chris@82 1643 T6p = T5P + T5S;
Chris@82 1644 T6q = KP707106781 * (T6o + T6p);
Chris@82 1645 T7s = KP707106781 * (T6p - T6o);
Chris@82 1646 }
Chris@82 1647 T60 = T5Y - T5Z;
Chris@82 1648 T65 = T61 - T64;
Chris@82 1649 T66 = FMA(KP382683432, T60, KP923879532 * T65);
Chris@82 1650 T6k = FNMS(KP923879532, T60, KP382683432 * T65);
Chris@82 1651 {
Chris@82 1652 E T6s, T6t, T6b, T6g;
Chris@82 1653 T6s = T61 + T64;
Chris@82 1654 T6t = T5Y + T5Z;
Chris@82 1655 T6u = FNMS(KP382683432, T6t, KP923879532 * T6s);
Chris@82 1656 T6A = FMA(KP923879532, T6t, KP382683432 * T6s);
Chris@82 1657 T6b = T67 - T6a;
Chris@82 1658 T6g = T6e - T6f;
Chris@82 1659 T6h = FNMS(KP382683432, T6g, KP923879532 * T6b);
Chris@82 1660 T6l = FMA(KP923879532, T6g, KP382683432 * T6b);
Chris@82 1661 }
Chris@82 1662 }
Chris@82 1663 {
Chris@82 1664 E T5V, T6i, T7r, T7u;
Chris@82 1665 T5V = T5J + T5U;
Chris@82 1666 T6i = T66 + T6h;
Chris@82 1667 ci[WS(rs, 13)] = T5V - T6i;
Chris@82 1668 cr[WS(rs, 2)] = T5V + T6i;
Chris@82 1669 T7r = T6l - T6k;
Chris@82 1670 T7u = T7s + T7t;
Chris@82 1671 cr[WS(rs, 18)] = T7r - T7u;
Chris@82 1672 ci[WS(rs, 29)] = T7r + T7u;
Chris@82 1673 }
Chris@82 1674 {
Chris@82 1675 E T7v, T7w, T6j, T6m;
Chris@82 1676 T7v = T6h - T66;
Chris@82 1677 T7w = T7t - T7s;
Chris@82 1678 cr[WS(rs, 26)] = T7v - T7w;
Chris@82 1679 ci[WS(rs, 21)] = T7v + T7w;
Chris@82 1680 T6j = T5J - T5U;
Chris@82 1681 T6m = T6k + T6l;
Chris@82 1682 cr[WS(rs, 10)] = T6j - T6m;
Chris@82 1683 ci[WS(rs, 5)] = T6j + T6m;
Chris@82 1684 }
Chris@82 1685 {
Chris@82 1686 E T6r, T6y, T7j, T7o;
Chris@82 1687 T6r = T6n + T6q;
Chris@82 1688 T6y = T6u + T6x;
Chris@82 1689 cr[WS(rs, 14)] = T6r - T6y;
Chris@82 1690 ci[WS(rs, 1)] = T6r + T6y;
Chris@82 1691 T7j = T6B - T6A;
Chris@82 1692 T7o = T7k + T7n;
Chris@82 1693 cr[WS(rs, 30)] = T7j - T7o;
Chris@82 1694 ci[WS(rs, 17)] = T7j + T7o;
Chris@82 1695 }
Chris@82 1696 {
Chris@82 1697 E T7p, T7q, T6z, T6C;
Chris@82 1698 T7p = T6x - T6u;
Chris@82 1699 T7q = T7n - T7k;
Chris@82 1700 cr[WS(rs, 22)] = T7p - T7q;
Chris@82 1701 ci[WS(rs, 25)] = T7p + T7q;
Chris@82 1702 T6z = T6n - T6q;
Chris@82 1703 T6C = T6A + T6B;
Chris@82 1704 ci[WS(rs, 9)] = T6z - T6C;
Chris@82 1705 cr[WS(rs, 6)] = T6z + T6C;
Chris@82 1706 }
Chris@82 1707 }
Chris@82 1708 {
Chris@82 1709 E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
Chris@82 1710 E T4B, T3g, T7P;
Chris@82 1711 T3g = KP707106781 * (T3a + T3f);
Chris@82 1712 T3h = T35 - T3g;
Chris@82 1713 T4D = T35 + T3g;
Chris@82 1714 T7P = KP707106781 * (T4V - T4U);
Chris@82 1715 T7R = T7P + T7Q;
Chris@82 1716 T7X = T7Q - T7P;
Chris@82 1717 {
Chris@82 1718 E T3s, T3D, T4L, T4M;
Chris@82 1719 T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
Chris@82 1720 T3D = FMA(KP923879532, T3x, KP382683432 * T3C);
Chris@82 1721 T3E = T3s + T3D;
Chris@82 1722 T7O = T3D - T3s;
Chris@82 1723 T4L = T4s + T4v;
Chris@82 1724 T4M = T4b + T4m;
Chris@82 1725 T4N = FNMS(KP195090322, T4M, KP980785280 * T4L);
Chris@82 1726 T4R = FMA(KP980785280, T4M, KP195090322 * T4L);
Chris@82 1727 }
Chris@82 1728 {
Chris@82 1729 E T3W, T45, T4E, T4F;
Chris@82 1730 T3W = T3K - T3V;
Chris@82 1731 T45 = T41 - T44;
Chris@82 1732 T46 = FNMS(KP555570233, T45, KP831469612 * T3W);
Chris@82 1733 T4A = FMA(KP831469612, T45, KP555570233 * T3W);
Chris@82 1734 T4E = FMA(KP382683432, T3r, KP923879532 * T3m);
Chris@82 1735 T4F = FNMS(KP382683432, T3x, KP923879532 * T3C);
Chris@82 1736 T4G = T4E + T4F;
Chris@82 1737 T7W = T4E - T4F;
Chris@82 1738 }
Chris@82 1739 {
Chris@82 1740 E T4I, T4J, T4n, T4w;
Chris@82 1741 T4I = T41 + T44;
Chris@82 1742 T4J = T3K + T3V;
Chris@82 1743 T4K = FMA(KP195090322, T4I, KP980785280 * T4J);
Chris@82 1744 T4Q = FNMS(KP980785280, T4I, KP195090322 * T4J);
Chris@82 1745 T4n = T4b - T4m;
Chris@82 1746 T4w = T4s - T4v;
Chris@82 1747 T4x = FMA(KP555570233, T4n, KP831469612 * T4w);
Chris@82 1748 T4B = FNMS(KP831469612, T4n, KP555570233 * T4w);
Chris@82 1749 }
Chris@82 1750 {
Chris@82 1751 E T3F, T4y, T7V, T7Y;
Chris@82 1752 T3F = T3h + T3E;
Chris@82 1753 T4y = T46 + T4x;
Chris@82 1754 cr[WS(rs, 13)] = T3F - T4y;
Chris@82 1755 ci[WS(rs, 2)] = T3F + T4y;
Chris@82 1756 T7V = T4B - T4A;
Chris@82 1757 T7Y = T7W + T7X;
Chris@82 1758 cr[WS(rs, 29)] = T7V - T7Y;
Chris@82 1759 ci[WS(rs, 18)] = T7V + T7Y;
Chris@82 1760 }
Chris@82 1761 {
Chris@82 1762 E T7Z, T80, T4z, T4C;
Chris@82 1763 T7Z = T4x - T46;
Chris@82 1764 T80 = T7X - T7W;
Chris@82 1765 cr[WS(rs, 21)] = T7Z - T80;
Chris@82 1766 ci[WS(rs, 26)] = T7Z + T80;
Chris@82 1767 T4z = T3h - T3E;
Chris@82 1768 T4C = T4A + T4B;
Chris@82 1769 ci[WS(rs, 10)] = T4z - T4C;
Chris@82 1770 cr[WS(rs, 5)] = T4z + T4C;
Chris@82 1771 }
Chris@82 1772 {
Chris@82 1773 E T4H, T4O, T7N, T7S;
Chris@82 1774 T4H = T4D + T4G;
Chris@82 1775 T4O = T4K + T4N;
Chris@82 1776 ci[WS(rs, 14)] = T4H - T4O;
Chris@82 1777 cr[WS(rs, 1)] = T4H + T4O;
Chris@82 1778 T7N = T4R - T4Q;
Chris@82 1779 T7S = T7O + T7R;
Chris@82 1780 cr[WS(rs, 17)] = T7N - T7S;
Chris@82 1781 ci[WS(rs, 30)] = T7N + T7S;
Chris@82 1782 }
Chris@82 1783 {
Chris@82 1784 E T7T, T7U, T4P, T4S;
Chris@82 1785 T7T = T4N - T4K;
Chris@82 1786 T7U = T7R - T7O;
Chris@82 1787 cr[WS(rs, 25)] = T7T - T7U;
Chris@82 1788 ci[WS(rs, 22)] = T7T + T7U;
Chris@82 1789 T4P = T4D - T4G;
Chris@82 1790 T4S = T4Q + T4R;
Chris@82 1791 cr[WS(rs, 9)] = T4P - T4S;
Chris@82 1792 ci[WS(rs, 6)] = T4P + T4S;
Chris@82 1793 }
Chris@82 1794 }
Chris@82 1795 }
Chris@82 1796 }
Chris@82 1797 }
Chris@82 1798
Chris@82 1799 static const tw_instr twinstr[] = {
Chris@82 1800 {TW_FULL, 1, 32},
Chris@82 1801 {TW_NEXT, 1, 0}
Chris@82 1802 };
Chris@82 1803
Chris@82 1804 static const hc2hc_desc desc = { 32, "hf_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@82 1805
Chris@82 1806 void X(codelet_hf_32) (planner *p) {
Chris@82 1807 X(khc2hc_register) (p, hf_32, &desc);
Chris@82 1808 }
Chris@82 1809 #endif