annotate src/fftw-3.3.8/dft/scalar/codelets/t1_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:15 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 41 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E T8, T8x, T3w, T87, Tl, T8y, T3B, T83, Tz, T6F, T3J, T5T, TM, T6G, T3Q;
Chris@82 50 E T5U, T11, T1e, T6M, T6J, T6K, T6L, T3Z, T5X, T46, T5Y, T1s, T1F, T6O, T6P;
Chris@82 51 E T6Q, T6R, T4e, T60, T4l, T61, T32, T7b, T78, T7N, T54, T6f, T5r, T6c, T29;
Chris@82 52 E T70, T6X, T7I, T4v, T68, T4S, T65, T3t, T79, T7e, T7O, T5b, T5s, T5i, T5t;
Chris@82 53 E T2A, T6Y, T73, T7J, T4C, T4T, T4J, T4U;
Chris@82 54 {
Chris@82 55 E T1, T86, T3, T6, T4, T84, T2, T7, T85, T5;
Chris@82 56 T1 = ri[0];
Chris@82 57 T86 = ii[0];
Chris@82 58 T3 = ri[WS(rs, 16)];
Chris@82 59 T6 = ii[WS(rs, 16)];
Chris@82 60 T2 = W[30];
Chris@82 61 T4 = T2 * T3;
Chris@82 62 T84 = T2 * T6;
Chris@82 63 T5 = W[31];
Chris@82 64 T7 = FMA(T5, T6, T4);
Chris@82 65 T85 = FNMS(T5, T3, T84);
Chris@82 66 T8 = T1 + T7;
Chris@82 67 T8x = T86 - T85;
Chris@82 68 T3w = T1 - T7;
Chris@82 69 T87 = T85 + T86;
Chris@82 70 }
Chris@82 71 {
Chris@82 72 E Ta, Td, Tb, T3x, Tg, Tj, Th, T3z, T9, Tf;
Chris@82 73 Ta = ri[WS(rs, 8)];
Chris@82 74 Td = ii[WS(rs, 8)];
Chris@82 75 T9 = W[14];
Chris@82 76 Tb = T9 * Ta;
Chris@82 77 T3x = T9 * Td;
Chris@82 78 Tg = ri[WS(rs, 24)];
Chris@82 79 Tj = ii[WS(rs, 24)];
Chris@82 80 Tf = W[46];
Chris@82 81 Th = Tf * Tg;
Chris@82 82 T3z = Tf * Tj;
Chris@82 83 {
Chris@82 84 E Te, T3y, Tk, T3A, Tc, Ti;
Chris@82 85 Tc = W[15];
Chris@82 86 Te = FMA(Tc, Td, Tb);
Chris@82 87 T3y = FNMS(Tc, Ta, T3x);
Chris@82 88 Ti = W[47];
Chris@82 89 Tk = FMA(Ti, Tj, Th);
Chris@82 90 T3A = FNMS(Ti, Tg, T3z);
Chris@82 91 Tl = Te + Tk;
Chris@82 92 T8y = Te - Tk;
Chris@82 93 T3B = T3y - T3A;
Chris@82 94 T83 = T3y + T3A;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Ts, T3F, Ty, T3H, T3D, T3I;
Chris@82 99 {
Chris@82 100 E To, Tr, Tp, T3E, Tn, Tq;
Chris@82 101 To = ri[WS(rs, 4)];
Chris@82 102 Tr = ii[WS(rs, 4)];
Chris@82 103 Tn = W[6];
Chris@82 104 Tp = Tn * To;
Chris@82 105 T3E = Tn * Tr;
Chris@82 106 Tq = W[7];
Chris@82 107 Ts = FMA(Tq, Tr, Tp);
Chris@82 108 T3F = FNMS(Tq, To, T3E);
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E Tu, Tx, Tv, T3G, Tt, Tw;
Chris@82 112 Tu = ri[WS(rs, 20)];
Chris@82 113 Tx = ii[WS(rs, 20)];
Chris@82 114 Tt = W[38];
Chris@82 115 Tv = Tt * Tu;
Chris@82 116 T3G = Tt * Tx;
Chris@82 117 Tw = W[39];
Chris@82 118 Ty = FMA(Tw, Tx, Tv);
Chris@82 119 T3H = FNMS(Tw, Tu, T3G);
Chris@82 120 }
Chris@82 121 Tz = Ts + Ty;
Chris@82 122 T6F = T3F + T3H;
Chris@82 123 T3D = Ts - Ty;
Chris@82 124 T3I = T3F - T3H;
Chris@82 125 T3J = T3D + T3I;
Chris@82 126 T5T = T3I - T3D;
Chris@82 127 }
Chris@82 128 {
Chris@82 129 E TF, T3M, TL, T3O, T3K, T3P;
Chris@82 130 {
Chris@82 131 E TB, TE, TC, T3L, TA, TD;
Chris@82 132 TB = ri[WS(rs, 28)];
Chris@82 133 TE = ii[WS(rs, 28)];
Chris@82 134 TA = W[54];
Chris@82 135 TC = TA * TB;
Chris@82 136 T3L = TA * TE;
Chris@82 137 TD = W[55];
Chris@82 138 TF = FMA(TD, TE, TC);
Chris@82 139 T3M = FNMS(TD, TB, T3L);
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E TH, TK, TI, T3N, TG, TJ;
Chris@82 143 TH = ri[WS(rs, 12)];
Chris@82 144 TK = ii[WS(rs, 12)];
Chris@82 145 TG = W[22];
Chris@82 146 TI = TG * TH;
Chris@82 147 T3N = TG * TK;
Chris@82 148 TJ = W[23];
Chris@82 149 TL = FMA(TJ, TK, TI);
Chris@82 150 T3O = FNMS(TJ, TH, T3N);
Chris@82 151 }
Chris@82 152 TM = TF + TL;
Chris@82 153 T6G = T3M + T3O;
Chris@82 154 T3K = TF - TL;
Chris@82 155 T3P = T3M - T3O;
Chris@82 156 T3Q = T3K - T3P;
Chris@82 157 T5U = T3K + T3P;
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TU, T3U, T1d, T44, T10, T3W, T17, T42;
Chris@82 161 {
Chris@82 162 E TQ, TT, TR, T3T, TP, TS;
Chris@82 163 TQ = ri[WS(rs, 2)];
Chris@82 164 TT = ii[WS(rs, 2)];
Chris@82 165 TP = W[2];
Chris@82 166 TR = TP * TQ;
Chris@82 167 T3T = TP * TT;
Chris@82 168 TS = W[3];
Chris@82 169 TU = FMA(TS, TT, TR);
Chris@82 170 T3U = FNMS(TS, TQ, T3T);
Chris@82 171 }
Chris@82 172 {
Chris@82 173 E T19, T1c, T1a, T43, T18, T1b;
Chris@82 174 T19 = ri[WS(rs, 26)];
Chris@82 175 T1c = ii[WS(rs, 26)];
Chris@82 176 T18 = W[50];
Chris@82 177 T1a = T18 * T19;
Chris@82 178 T43 = T18 * T1c;
Chris@82 179 T1b = W[51];
Chris@82 180 T1d = FMA(T1b, T1c, T1a);
Chris@82 181 T44 = FNMS(T1b, T19, T43);
Chris@82 182 }
Chris@82 183 {
Chris@82 184 E TW, TZ, TX, T3V, TV, TY;
Chris@82 185 TW = ri[WS(rs, 18)];
Chris@82 186 TZ = ii[WS(rs, 18)];
Chris@82 187 TV = W[34];
Chris@82 188 TX = TV * TW;
Chris@82 189 T3V = TV * TZ;
Chris@82 190 TY = W[35];
Chris@82 191 T10 = FMA(TY, TZ, TX);
Chris@82 192 T3W = FNMS(TY, TW, T3V);
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E T13, T16, T14, T41, T12, T15;
Chris@82 196 T13 = ri[WS(rs, 10)];
Chris@82 197 T16 = ii[WS(rs, 10)];
Chris@82 198 T12 = W[18];
Chris@82 199 T14 = T12 * T13;
Chris@82 200 T41 = T12 * T16;
Chris@82 201 T15 = W[19];
Chris@82 202 T17 = FMA(T15, T16, T14);
Chris@82 203 T42 = FNMS(T15, T13, T41);
Chris@82 204 }
Chris@82 205 T11 = TU + T10;
Chris@82 206 T1e = T17 + T1d;
Chris@82 207 T6M = T11 - T1e;
Chris@82 208 T6J = T3U + T3W;
Chris@82 209 T6K = T42 + T44;
Chris@82 210 T6L = T6J - T6K;
Chris@82 211 {
Chris@82 212 E T3X, T3Y, T40, T45;
Chris@82 213 T3X = T3U - T3W;
Chris@82 214 T3Y = T17 - T1d;
Chris@82 215 T3Z = T3X - T3Y;
Chris@82 216 T5X = T3X + T3Y;
Chris@82 217 T40 = TU - T10;
Chris@82 218 T45 = T42 - T44;
Chris@82 219 T46 = T40 + T45;
Chris@82 220 T5Y = T40 - T45;
Chris@82 221 }
Chris@82 222 }
Chris@82 223 {
Chris@82 224 E T1l, T49, T1E, T4j, T1r, T4b, T1y, T4h;
Chris@82 225 {
Chris@82 226 E T1h, T1k, T1i, T48, T1g, T1j;
Chris@82 227 T1h = ri[WS(rs, 30)];
Chris@82 228 T1k = ii[WS(rs, 30)];
Chris@82 229 T1g = W[58];
Chris@82 230 T1i = T1g * T1h;
Chris@82 231 T48 = T1g * T1k;
Chris@82 232 T1j = W[59];
Chris@82 233 T1l = FMA(T1j, T1k, T1i);
Chris@82 234 T49 = FNMS(T1j, T1h, T48);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1A, T1D, T1B, T4i, T1z, T1C;
Chris@82 238 T1A = ri[WS(rs, 22)];
Chris@82 239 T1D = ii[WS(rs, 22)];
Chris@82 240 T1z = W[42];
Chris@82 241 T1B = T1z * T1A;
Chris@82 242 T4i = T1z * T1D;
Chris@82 243 T1C = W[43];
Chris@82 244 T1E = FMA(T1C, T1D, T1B);
Chris@82 245 T4j = FNMS(T1C, T1A, T4i);
Chris@82 246 }
Chris@82 247 {
Chris@82 248 E T1n, T1q, T1o, T4a, T1m, T1p;
Chris@82 249 T1n = ri[WS(rs, 14)];
Chris@82 250 T1q = ii[WS(rs, 14)];
Chris@82 251 T1m = W[26];
Chris@82 252 T1o = T1m * T1n;
Chris@82 253 T4a = T1m * T1q;
Chris@82 254 T1p = W[27];
Chris@82 255 T1r = FMA(T1p, T1q, T1o);
Chris@82 256 T4b = FNMS(T1p, T1n, T4a);
Chris@82 257 }
Chris@82 258 {
Chris@82 259 E T1u, T1x, T1v, T4g, T1t, T1w;
Chris@82 260 T1u = ri[WS(rs, 6)];
Chris@82 261 T1x = ii[WS(rs, 6)];
Chris@82 262 T1t = W[10];
Chris@82 263 T1v = T1t * T1u;
Chris@82 264 T4g = T1t * T1x;
Chris@82 265 T1w = W[11];
Chris@82 266 T1y = FMA(T1w, T1x, T1v);
Chris@82 267 T4h = FNMS(T1w, T1u, T4g);
Chris@82 268 }
Chris@82 269 T1s = T1l + T1r;
Chris@82 270 T1F = T1y + T1E;
Chris@82 271 T6O = T1s - T1F;
Chris@82 272 T6P = T49 + T4b;
Chris@82 273 T6Q = T4h + T4j;
Chris@82 274 T6R = T6P - T6Q;
Chris@82 275 {
Chris@82 276 E T4c, T4d, T4f, T4k;
Chris@82 277 T4c = T49 - T4b;
Chris@82 278 T4d = T1y - T1E;
Chris@82 279 T4e = T4c - T4d;
Chris@82 280 T60 = T4c + T4d;
Chris@82 281 T4f = T1l - T1r;
Chris@82 282 T4k = T4h - T4j;
Chris@82 283 T4l = T4f + T4k;
Chris@82 284 T61 = T4f - T4k;
Chris@82 285 }
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T2H, T4Z, T30, T5p, T2N, T51, T2U, T5n;
Chris@82 289 {
Chris@82 290 E T2D, T2G, T2E, T4Y, T2C, T2F;
Chris@82 291 T2D = ri[WS(rs, 31)];
Chris@82 292 T2G = ii[WS(rs, 31)];
Chris@82 293 T2C = W[60];
Chris@82 294 T2E = T2C * T2D;
Chris@82 295 T4Y = T2C * T2G;
Chris@82 296 T2F = W[61];
Chris@82 297 T2H = FMA(T2F, T2G, T2E);
Chris@82 298 T4Z = FNMS(T2F, T2D, T4Y);
Chris@82 299 }
Chris@82 300 {
Chris@82 301 E T2W, T2Z, T2X, T5o, T2V, T2Y;
Chris@82 302 T2W = ri[WS(rs, 23)];
Chris@82 303 T2Z = ii[WS(rs, 23)];
Chris@82 304 T2V = W[44];
Chris@82 305 T2X = T2V * T2W;
Chris@82 306 T5o = T2V * T2Z;
Chris@82 307 T2Y = W[45];
Chris@82 308 T30 = FMA(T2Y, T2Z, T2X);
Chris@82 309 T5p = FNMS(T2Y, T2W, T5o);
Chris@82 310 }
Chris@82 311 {
Chris@82 312 E T2J, T2M, T2K, T50, T2I, T2L;
Chris@82 313 T2J = ri[WS(rs, 15)];
Chris@82 314 T2M = ii[WS(rs, 15)];
Chris@82 315 T2I = W[28];
Chris@82 316 T2K = T2I * T2J;
Chris@82 317 T50 = T2I * T2M;
Chris@82 318 T2L = W[29];
Chris@82 319 T2N = FMA(T2L, T2M, T2K);
Chris@82 320 T51 = FNMS(T2L, T2J, T50);
Chris@82 321 }
Chris@82 322 {
Chris@82 323 E T2Q, T2T, T2R, T5m, T2P, T2S;
Chris@82 324 T2Q = ri[WS(rs, 7)];
Chris@82 325 T2T = ii[WS(rs, 7)];
Chris@82 326 T2P = W[12];
Chris@82 327 T2R = T2P * T2Q;
Chris@82 328 T5m = T2P * T2T;
Chris@82 329 T2S = W[13];
Chris@82 330 T2U = FMA(T2S, T2T, T2R);
Chris@82 331 T5n = FNMS(T2S, T2Q, T5m);
Chris@82 332 }
Chris@82 333 {
Chris@82 334 E T2O, T31, T76, T77;
Chris@82 335 T2O = T2H + T2N;
Chris@82 336 T31 = T2U + T30;
Chris@82 337 T32 = T2O + T31;
Chris@82 338 T7b = T2O - T31;
Chris@82 339 T76 = T4Z + T51;
Chris@82 340 T77 = T5n + T5p;
Chris@82 341 T78 = T76 - T77;
Chris@82 342 T7N = T76 + T77;
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T52, T53, T5l, T5q;
Chris@82 346 T52 = T4Z - T51;
Chris@82 347 T53 = T2U - T30;
Chris@82 348 T54 = T52 - T53;
Chris@82 349 T6f = T52 + T53;
Chris@82 350 T5l = T2H - T2N;
Chris@82 351 T5q = T5n - T5p;
Chris@82 352 T5r = T5l + T5q;
Chris@82 353 T6c = T5l - T5q;
Chris@82 354 }
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T1O, T4q, T27, T4Q, T1U, T4s, T21, T4O;
Chris@82 358 {
Chris@82 359 E T1K, T1N, T1L, T4p, T1J, T1M;
Chris@82 360 T1K = ri[WS(rs, 1)];
Chris@82 361 T1N = ii[WS(rs, 1)];
Chris@82 362 T1J = W[0];
Chris@82 363 T1L = T1J * T1K;
Chris@82 364 T4p = T1J * T1N;
Chris@82 365 T1M = W[1];
Chris@82 366 T1O = FMA(T1M, T1N, T1L);
Chris@82 367 T4q = FNMS(T1M, T1K, T4p);
Chris@82 368 }
Chris@82 369 {
Chris@82 370 E T23, T26, T24, T4P, T22, T25;
Chris@82 371 T23 = ri[WS(rs, 25)];
Chris@82 372 T26 = ii[WS(rs, 25)];
Chris@82 373 T22 = W[48];
Chris@82 374 T24 = T22 * T23;
Chris@82 375 T4P = T22 * T26;
Chris@82 376 T25 = W[49];
Chris@82 377 T27 = FMA(T25, T26, T24);
Chris@82 378 T4Q = FNMS(T25, T23, T4P);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E T1Q, T1T, T1R, T4r, T1P, T1S;
Chris@82 382 T1Q = ri[WS(rs, 17)];
Chris@82 383 T1T = ii[WS(rs, 17)];
Chris@82 384 T1P = W[32];
Chris@82 385 T1R = T1P * T1Q;
Chris@82 386 T4r = T1P * T1T;
Chris@82 387 T1S = W[33];
Chris@82 388 T1U = FMA(T1S, T1T, T1R);
Chris@82 389 T4s = FNMS(T1S, T1Q, T4r);
Chris@82 390 }
Chris@82 391 {
Chris@82 392 E T1X, T20, T1Y, T4N, T1W, T1Z;
Chris@82 393 T1X = ri[WS(rs, 9)];
Chris@82 394 T20 = ii[WS(rs, 9)];
Chris@82 395 T1W = W[16];
Chris@82 396 T1Y = T1W * T1X;
Chris@82 397 T4N = T1W * T20;
Chris@82 398 T1Z = W[17];
Chris@82 399 T21 = FMA(T1Z, T20, T1Y);
Chris@82 400 T4O = FNMS(T1Z, T1X, T4N);
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T1V, T28, T6V, T6W;
Chris@82 404 T1V = T1O + T1U;
Chris@82 405 T28 = T21 + T27;
Chris@82 406 T29 = T1V + T28;
Chris@82 407 T70 = T1V - T28;
Chris@82 408 T6V = T4q + T4s;
Chris@82 409 T6W = T4O + T4Q;
Chris@82 410 T6X = T6V - T6W;
Chris@82 411 T7I = T6V + T6W;
Chris@82 412 }
Chris@82 413 {
Chris@82 414 E T4t, T4u, T4M, T4R;
Chris@82 415 T4t = T4q - T4s;
Chris@82 416 T4u = T21 - T27;
Chris@82 417 T4v = T4t - T4u;
Chris@82 418 T68 = T4t + T4u;
Chris@82 419 T4M = T1O - T1U;
Chris@82 420 T4R = T4O - T4Q;
Chris@82 421 T4S = T4M + T4R;
Chris@82 422 T65 = T4M - T4R;
Chris@82 423 }
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T38, T56, T3r, T5g, T3e, T58, T3l, T5e;
Chris@82 427 {
Chris@82 428 E T34, T37, T35, T55, T33, T36;
Chris@82 429 T34 = ri[WS(rs, 3)];
Chris@82 430 T37 = ii[WS(rs, 3)];
Chris@82 431 T33 = W[4];
Chris@82 432 T35 = T33 * T34;
Chris@82 433 T55 = T33 * T37;
Chris@82 434 T36 = W[5];
Chris@82 435 T38 = FMA(T36, T37, T35);
Chris@82 436 T56 = FNMS(T36, T34, T55);
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E T3n, T3q, T3o, T5f, T3m, T3p;
Chris@82 440 T3n = ri[WS(rs, 11)];
Chris@82 441 T3q = ii[WS(rs, 11)];
Chris@82 442 T3m = W[20];
Chris@82 443 T3o = T3m * T3n;
Chris@82 444 T5f = T3m * T3q;
Chris@82 445 T3p = W[21];
Chris@82 446 T3r = FMA(T3p, T3q, T3o);
Chris@82 447 T5g = FNMS(T3p, T3n, T5f);
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T3a, T3d, T3b, T57, T39, T3c;
Chris@82 451 T3a = ri[WS(rs, 19)];
Chris@82 452 T3d = ii[WS(rs, 19)];
Chris@82 453 T39 = W[36];
Chris@82 454 T3b = T39 * T3a;
Chris@82 455 T57 = T39 * T3d;
Chris@82 456 T3c = W[37];
Chris@82 457 T3e = FMA(T3c, T3d, T3b);
Chris@82 458 T58 = FNMS(T3c, T3a, T57);
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E T3h, T3k, T3i, T5d, T3g, T3j;
Chris@82 462 T3h = ri[WS(rs, 27)];
Chris@82 463 T3k = ii[WS(rs, 27)];
Chris@82 464 T3g = W[52];
Chris@82 465 T3i = T3g * T3h;
Chris@82 466 T5d = T3g * T3k;
Chris@82 467 T3j = W[53];
Chris@82 468 T3l = FMA(T3j, T3k, T3i);
Chris@82 469 T5e = FNMS(T3j, T3h, T5d);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T3f, T3s, T7c, T7d;
Chris@82 473 T3f = T38 + T3e;
Chris@82 474 T3s = T3l + T3r;
Chris@82 475 T3t = T3f + T3s;
Chris@82 476 T79 = T3s - T3f;
Chris@82 477 T7c = T56 + T58;
Chris@82 478 T7d = T5e + T5g;
Chris@82 479 T7e = T7c - T7d;
Chris@82 480 T7O = T7c + T7d;
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E T59, T5a, T5c, T5h;
Chris@82 484 T59 = T56 - T58;
Chris@82 485 T5a = T38 - T3e;
Chris@82 486 T5b = T59 - T5a;
Chris@82 487 T5s = T5a + T59;
Chris@82 488 T5c = T3l - T3r;
Chris@82 489 T5h = T5e - T5g;
Chris@82 490 T5i = T5c + T5h;
Chris@82 491 T5t = T5c - T5h;
Chris@82 492 }
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T2f, T4x, T2y, T4H, T2l, T4z, T2s, T4F;
Chris@82 496 {
Chris@82 497 E T2b, T2e, T2c, T4w, T2a, T2d;
Chris@82 498 T2b = ri[WS(rs, 5)];
Chris@82 499 T2e = ii[WS(rs, 5)];
Chris@82 500 T2a = W[8];
Chris@82 501 T2c = T2a * T2b;
Chris@82 502 T4w = T2a * T2e;
Chris@82 503 T2d = W[9];
Chris@82 504 T2f = FMA(T2d, T2e, T2c);
Chris@82 505 T4x = FNMS(T2d, T2b, T4w);
Chris@82 506 }
Chris@82 507 {
Chris@82 508 E T2u, T2x, T2v, T4G, T2t, T2w;
Chris@82 509 T2u = ri[WS(rs, 13)];
Chris@82 510 T2x = ii[WS(rs, 13)];
Chris@82 511 T2t = W[24];
Chris@82 512 T2v = T2t * T2u;
Chris@82 513 T4G = T2t * T2x;
Chris@82 514 T2w = W[25];
Chris@82 515 T2y = FMA(T2w, T2x, T2v);
Chris@82 516 T4H = FNMS(T2w, T2u, T4G);
Chris@82 517 }
Chris@82 518 {
Chris@82 519 E T2h, T2k, T2i, T4y, T2g, T2j;
Chris@82 520 T2h = ri[WS(rs, 21)];
Chris@82 521 T2k = ii[WS(rs, 21)];
Chris@82 522 T2g = W[40];
Chris@82 523 T2i = T2g * T2h;
Chris@82 524 T4y = T2g * T2k;
Chris@82 525 T2j = W[41];
Chris@82 526 T2l = FMA(T2j, T2k, T2i);
Chris@82 527 T4z = FNMS(T2j, T2h, T4y);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T2o, T2r, T2p, T4E, T2n, T2q;
Chris@82 531 T2o = ri[WS(rs, 29)];
Chris@82 532 T2r = ii[WS(rs, 29)];
Chris@82 533 T2n = W[56];
Chris@82 534 T2p = T2n * T2o;
Chris@82 535 T4E = T2n * T2r;
Chris@82 536 T2q = W[57];
Chris@82 537 T2s = FMA(T2q, T2r, T2p);
Chris@82 538 T4F = FNMS(T2q, T2o, T4E);
Chris@82 539 }
Chris@82 540 {
Chris@82 541 E T2m, T2z, T71, T72;
Chris@82 542 T2m = T2f + T2l;
Chris@82 543 T2z = T2s + T2y;
Chris@82 544 T2A = T2m + T2z;
Chris@82 545 T6Y = T2z - T2m;
Chris@82 546 T71 = T4x + T4z;
Chris@82 547 T72 = T4F + T4H;
Chris@82 548 T73 = T71 - T72;
Chris@82 549 T7J = T71 + T72;
Chris@82 550 }
Chris@82 551 {
Chris@82 552 E T4A, T4B, T4D, T4I;
Chris@82 553 T4A = T4x - T4z;
Chris@82 554 T4B = T2f - T2l;
Chris@82 555 T4C = T4A - T4B;
Chris@82 556 T4T = T4B + T4A;
Chris@82 557 T4D = T2s - T2y;
Chris@82 558 T4I = T4F - T4H;
Chris@82 559 T4J = T4D + T4I;
Chris@82 560 T4U = T4D - T4I;
Chris@82 561 }
Chris@82 562 }
Chris@82 563 {
Chris@82 564 E TO, T7C, T7Z, T80, T89, T8e, T1H, T8d, T3v, T8b, T7L, T7T, T7Q, T7U, T7F;
Chris@82 565 E T81;
Chris@82 566 {
Chris@82 567 E Tm, TN, T7X, T7Y;
Chris@82 568 Tm = T8 + Tl;
Chris@82 569 TN = Tz + TM;
Chris@82 570 TO = Tm + TN;
Chris@82 571 T7C = Tm - TN;
Chris@82 572 T7X = T7I + T7J;
Chris@82 573 T7Y = T7N + T7O;
Chris@82 574 T7Z = T7X - T7Y;
Chris@82 575 T80 = T7X + T7Y;
Chris@82 576 }
Chris@82 577 {
Chris@82 578 E T82, T88, T1f, T1G;
Chris@82 579 T82 = T6F + T6G;
Chris@82 580 T88 = T83 + T87;
Chris@82 581 T89 = T82 + T88;
Chris@82 582 T8e = T88 - T82;
Chris@82 583 T1f = T11 + T1e;
Chris@82 584 T1G = T1s + T1F;
Chris@82 585 T1H = T1f + T1G;
Chris@82 586 T8d = T1G - T1f;
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T2B, T3u, T7H, T7K;
Chris@82 590 T2B = T29 + T2A;
Chris@82 591 T3u = T32 + T3t;
Chris@82 592 T3v = T2B + T3u;
Chris@82 593 T8b = T3u - T2B;
Chris@82 594 T7H = T29 - T2A;
Chris@82 595 T7K = T7I - T7J;
Chris@82 596 T7L = T7H + T7K;
Chris@82 597 T7T = T7K - T7H;
Chris@82 598 }
Chris@82 599 {
Chris@82 600 E T7M, T7P, T7D, T7E;
Chris@82 601 T7M = T32 - T3t;
Chris@82 602 T7P = T7N - T7O;
Chris@82 603 T7Q = T7M - T7P;
Chris@82 604 T7U = T7M + T7P;
Chris@82 605 T7D = T6J + T6K;
Chris@82 606 T7E = T6P + T6Q;
Chris@82 607 T7F = T7D - T7E;
Chris@82 608 T81 = T7D + T7E;
Chris@82 609 }
Chris@82 610 {
Chris@82 611 E T1I, T8a, T7W, T8c;
Chris@82 612 T1I = TO + T1H;
Chris@82 613 ri[WS(rs, 16)] = T1I - T3v;
Chris@82 614 ri[0] = T1I + T3v;
Chris@82 615 T8a = T81 + T89;
Chris@82 616 ii[0] = T80 + T8a;
Chris@82 617 ii[WS(rs, 16)] = T8a - T80;
Chris@82 618 T7W = TO - T1H;
Chris@82 619 ri[WS(rs, 24)] = T7W - T7Z;
Chris@82 620 ri[WS(rs, 8)] = T7W + T7Z;
Chris@82 621 T8c = T89 - T81;
Chris@82 622 ii[WS(rs, 8)] = T8b + T8c;
Chris@82 623 ii[WS(rs, 24)] = T8c - T8b;
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T7G, T7R, T8f, T8g;
Chris@82 627 T7G = T7C + T7F;
Chris@82 628 T7R = T7L + T7Q;
Chris@82 629 ri[WS(rs, 20)] = FNMS(KP707106781, T7R, T7G);
Chris@82 630 ri[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
Chris@82 631 T8f = T8d + T8e;
Chris@82 632 T8g = T7T + T7U;
Chris@82 633 ii[WS(rs, 4)] = FMA(KP707106781, T8g, T8f);
Chris@82 634 ii[WS(rs, 20)] = FNMS(KP707106781, T8g, T8f);
Chris@82 635 }
Chris@82 636 {
Chris@82 637 E T7S, T7V, T8h, T8i;
Chris@82 638 T7S = T7C - T7F;
Chris@82 639 T7V = T7T - T7U;
Chris@82 640 ri[WS(rs, 28)] = FNMS(KP707106781, T7V, T7S);
Chris@82 641 ri[WS(rs, 12)] = FMA(KP707106781, T7V, T7S);
Chris@82 642 T8h = T8e - T8d;
Chris@82 643 T8i = T7Q - T7L;
Chris@82 644 ii[WS(rs, 12)] = FMA(KP707106781, T8i, T8h);
Chris@82 645 ii[WS(rs, 28)] = FNMS(KP707106781, T8i, T8h);
Chris@82 646 }
Chris@82 647 }
Chris@82 648 {
Chris@82 649 E T6I, T7m, T7w, T7A, T8l, T8r, T6T, T8m, T75, T7j, T7p, T8s, T7t, T7z, T7g;
Chris@82 650 E T7k;
Chris@82 651 {
Chris@82 652 E T6E, T6H, T7u, T7v;
Chris@82 653 T6E = T8 - Tl;
Chris@82 654 T6H = T6F - T6G;
Chris@82 655 T6I = T6E - T6H;
Chris@82 656 T7m = T6E + T6H;
Chris@82 657 T7u = T7b + T7e;
Chris@82 658 T7v = T78 + T79;
Chris@82 659 T7w = FNMS(KP414213562, T7v, T7u);
Chris@82 660 T7A = FMA(KP414213562, T7u, T7v);
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T8j, T8k, T6N, T6S;
Chris@82 664 T8j = TM - Tz;
Chris@82 665 T8k = T87 - T83;
Chris@82 666 T8l = T8j + T8k;
Chris@82 667 T8r = T8k - T8j;
Chris@82 668 T6N = T6L - T6M;
Chris@82 669 T6S = T6O + T6R;
Chris@82 670 T6T = T6N - T6S;
Chris@82 671 T8m = T6N + T6S;
Chris@82 672 }
Chris@82 673 {
Chris@82 674 E T6Z, T74, T7n, T7o;
Chris@82 675 T6Z = T6X - T6Y;
Chris@82 676 T74 = T70 - T73;
Chris@82 677 T75 = FMA(KP414213562, T74, T6Z);
Chris@82 678 T7j = FNMS(KP414213562, T6Z, T74);
Chris@82 679 T7n = T6M + T6L;
Chris@82 680 T7o = T6O - T6R;
Chris@82 681 T7p = T7n + T7o;
Chris@82 682 T8s = T7o - T7n;
Chris@82 683 }
Chris@82 684 {
Chris@82 685 E T7r, T7s, T7a, T7f;
Chris@82 686 T7r = T70 + T73;
Chris@82 687 T7s = T6X + T6Y;
Chris@82 688 T7t = FMA(KP414213562, T7s, T7r);
Chris@82 689 T7z = FNMS(KP414213562, T7r, T7s);
Chris@82 690 T7a = T78 - T79;
Chris@82 691 T7f = T7b - T7e;
Chris@82 692 T7g = FNMS(KP414213562, T7f, T7a);
Chris@82 693 T7k = FMA(KP414213562, T7a, T7f);
Chris@82 694 }
Chris@82 695 {
Chris@82 696 E T6U, T7h, T8t, T8u;
Chris@82 697 T6U = FMA(KP707106781, T6T, T6I);
Chris@82 698 T7h = T75 - T7g;
Chris@82 699 ri[WS(rs, 22)] = FNMS(KP923879532, T7h, T6U);
Chris@82 700 ri[WS(rs, 6)] = FMA(KP923879532, T7h, T6U);
Chris@82 701 T8t = FMA(KP707106781, T8s, T8r);
Chris@82 702 T8u = T7k - T7j;
Chris@82 703 ii[WS(rs, 6)] = FMA(KP923879532, T8u, T8t);
Chris@82 704 ii[WS(rs, 22)] = FNMS(KP923879532, T8u, T8t);
Chris@82 705 }
Chris@82 706 {
Chris@82 707 E T7i, T7l, T8v, T8w;
Chris@82 708 T7i = FNMS(KP707106781, T6T, T6I);
Chris@82 709 T7l = T7j + T7k;
Chris@82 710 ri[WS(rs, 14)] = FNMS(KP923879532, T7l, T7i);
Chris@82 711 ri[WS(rs, 30)] = FMA(KP923879532, T7l, T7i);
Chris@82 712 T8v = FNMS(KP707106781, T8s, T8r);
Chris@82 713 T8w = T75 + T7g;
Chris@82 714 ii[WS(rs, 14)] = FNMS(KP923879532, T8w, T8v);
Chris@82 715 ii[WS(rs, 30)] = FMA(KP923879532, T8w, T8v);
Chris@82 716 }
Chris@82 717 {
Chris@82 718 E T7q, T7x, T8n, T8o;
Chris@82 719 T7q = FMA(KP707106781, T7p, T7m);
Chris@82 720 T7x = T7t + T7w;
Chris@82 721 ri[WS(rs, 18)] = FNMS(KP923879532, T7x, T7q);
Chris@82 722 ri[WS(rs, 2)] = FMA(KP923879532, T7x, T7q);
Chris@82 723 T8n = FMA(KP707106781, T8m, T8l);
Chris@82 724 T8o = T7z + T7A;
Chris@82 725 ii[WS(rs, 2)] = FMA(KP923879532, T8o, T8n);
Chris@82 726 ii[WS(rs, 18)] = FNMS(KP923879532, T8o, T8n);
Chris@82 727 }
Chris@82 728 {
Chris@82 729 E T7y, T7B, T8p, T8q;
Chris@82 730 T7y = FNMS(KP707106781, T7p, T7m);
Chris@82 731 T7B = T7z - T7A;
Chris@82 732 ri[WS(rs, 26)] = FNMS(KP923879532, T7B, T7y);
Chris@82 733 ri[WS(rs, 10)] = FMA(KP923879532, T7B, T7y);
Chris@82 734 T8p = FNMS(KP707106781, T8m, T8l);
Chris@82 735 T8q = T7w - T7t;
Chris@82 736 ii[WS(rs, 10)] = FMA(KP923879532, T8q, T8p);
Chris@82 737 ii[WS(rs, 26)] = FNMS(KP923879532, T8q, T8p);
Chris@82 738 }
Chris@82 739 }
Chris@82 740 {
Chris@82 741 E T3S, T5C, T4n, T8C, T8B, T8H, T5F, T8I, T5w, T5Q, T5A, T5M, T4X, T5P, T5z;
Chris@82 742 E T5J;
Chris@82 743 {
Chris@82 744 E T3C, T3R, T5D, T5E;
Chris@82 745 T3C = T3w + T3B;
Chris@82 746 T3R = T3J + T3Q;
Chris@82 747 T3S = FNMS(KP707106781, T3R, T3C);
Chris@82 748 T5C = FMA(KP707106781, T3R, T3C);
Chris@82 749 {
Chris@82 750 E T47, T4m, T8z, T8A;
Chris@82 751 T47 = FNMS(KP414213562, T46, T3Z);
Chris@82 752 T4m = FMA(KP414213562, T4l, T4e);
Chris@82 753 T4n = T47 - T4m;
Chris@82 754 T8C = T47 + T4m;
Chris@82 755 T8z = T8x - T8y;
Chris@82 756 T8A = T5T + T5U;
Chris@82 757 T8B = FMA(KP707106781, T8A, T8z);
Chris@82 758 T8H = FNMS(KP707106781, T8A, T8z);
Chris@82 759 }
Chris@82 760 T5D = FMA(KP414213562, T3Z, T46);
Chris@82 761 T5E = FNMS(KP414213562, T4e, T4l);
Chris@82 762 T5F = T5D + T5E;
Chris@82 763 T8I = T5E - T5D;
Chris@82 764 {
Chris@82 765 E T5k, T5L, T5v, T5K, T5j, T5u;
Chris@82 766 T5j = T5b + T5i;
Chris@82 767 T5k = FNMS(KP707106781, T5j, T54);
Chris@82 768 T5L = FMA(KP707106781, T5j, T54);
Chris@82 769 T5u = T5s + T5t;
Chris@82 770 T5v = FNMS(KP707106781, T5u, T5r);
Chris@82 771 T5K = FMA(KP707106781, T5u, T5r);
Chris@82 772 T5w = FNMS(KP668178637, T5v, T5k);
Chris@82 773 T5Q = FMA(KP198912367, T5K, T5L);
Chris@82 774 T5A = FMA(KP668178637, T5k, T5v);
Chris@82 775 T5M = FNMS(KP198912367, T5L, T5K);
Chris@82 776 }
Chris@82 777 {
Chris@82 778 E T4L, T5I, T4W, T5H, T4K, T4V;
Chris@82 779 T4K = T4C + T4J;
Chris@82 780 T4L = FNMS(KP707106781, T4K, T4v);
Chris@82 781 T5I = FMA(KP707106781, T4K, T4v);
Chris@82 782 T4V = T4T + T4U;
Chris@82 783 T4W = FNMS(KP707106781, T4V, T4S);
Chris@82 784 T5H = FMA(KP707106781, T4V, T4S);
Chris@82 785 T4X = FMA(KP668178637, T4W, T4L);
Chris@82 786 T5P = FNMS(KP198912367, T5H, T5I);
Chris@82 787 T5z = FNMS(KP668178637, T4L, T4W);
Chris@82 788 T5J = FMA(KP198912367, T5I, T5H);
Chris@82 789 }
Chris@82 790 }
Chris@82 791 {
Chris@82 792 E T4o, T5x, T8J, T8K;
Chris@82 793 T4o = FMA(KP923879532, T4n, T3S);
Chris@82 794 T5x = T4X - T5w;
Chris@82 795 ri[WS(rs, 21)] = FNMS(KP831469612, T5x, T4o);
Chris@82 796 ri[WS(rs, 5)] = FMA(KP831469612, T5x, T4o);
Chris@82 797 T8J = FMA(KP923879532, T8I, T8H);
Chris@82 798 T8K = T5A - T5z;
Chris@82 799 ii[WS(rs, 5)] = FMA(KP831469612, T8K, T8J);
Chris@82 800 ii[WS(rs, 21)] = FNMS(KP831469612, T8K, T8J);
Chris@82 801 }
Chris@82 802 {
Chris@82 803 E T5y, T5B, T8L, T8M;
Chris@82 804 T5y = FNMS(KP923879532, T4n, T3S);
Chris@82 805 T5B = T5z + T5A;
Chris@82 806 ri[WS(rs, 13)] = FNMS(KP831469612, T5B, T5y);
Chris@82 807 ri[WS(rs, 29)] = FMA(KP831469612, T5B, T5y);
Chris@82 808 T8L = FNMS(KP923879532, T8I, T8H);
Chris@82 809 T8M = T4X + T5w;
Chris@82 810 ii[WS(rs, 13)] = FNMS(KP831469612, T8M, T8L);
Chris@82 811 ii[WS(rs, 29)] = FMA(KP831469612, T8M, T8L);
Chris@82 812 }
Chris@82 813 {
Chris@82 814 E T5G, T5N, T8D, T8E;
Chris@82 815 T5G = FMA(KP923879532, T5F, T5C);
Chris@82 816 T5N = T5J + T5M;
Chris@82 817 ri[WS(rs, 17)] = FNMS(KP980785280, T5N, T5G);
Chris@82 818 ri[WS(rs, 1)] = FMA(KP980785280, T5N, T5G);
Chris@82 819 T8D = FMA(KP923879532, T8C, T8B);
Chris@82 820 T8E = T5P + T5Q;
Chris@82 821 ii[WS(rs, 1)] = FMA(KP980785280, T8E, T8D);
Chris@82 822 ii[WS(rs, 17)] = FNMS(KP980785280, T8E, T8D);
Chris@82 823 }
Chris@82 824 {
Chris@82 825 E T5O, T5R, T8F, T8G;
Chris@82 826 T5O = FNMS(KP923879532, T5F, T5C);
Chris@82 827 T5R = T5P - T5Q;
Chris@82 828 ri[WS(rs, 25)] = FNMS(KP980785280, T5R, T5O);
Chris@82 829 ri[WS(rs, 9)] = FMA(KP980785280, T5R, T5O);
Chris@82 830 T8F = FNMS(KP923879532, T8C, T8B);
Chris@82 831 T8G = T5M - T5J;
Chris@82 832 ii[WS(rs, 9)] = FMA(KP980785280, T8G, T8F);
Chris@82 833 ii[WS(rs, 25)] = FNMS(KP980785280, T8G, T8F);
Chris@82 834 }
Chris@82 835 }
Chris@82 836 {
Chris@82 837 E T5W, T6o, T63, T8W, T8P, T8V, T6r, T8Q, T6i, T6C, T6m, T6y, T6b, T6B, T6l;
Chris@82 838 E T6v;
Chris@82 839 {
Chris@82 840 E T5S, T5V, T6p, T6q;
Chris@82 841 T5S = T3w - T3B;
Chris@82 842 T5V = T5T - T5U;
Chris@82 843 T5W = FMA(KP707106781, T5V, T5S);
Chris@82 844 T6o = FNMS(KP707106781, T5V, T5S);
Chris@82 845 {
Chris@82 846 E T5Z, T62, T8N, T8O;
Chris@82 847 T5Z = FMA(KP414213562, T5Y, T5X);
Chris@82 848 T62 = FNMS(KP414213562, T61, T60);
Chris@82 849 T63 = T5Z - T62;
Chris@82 850 T8W = T5Z + T62;
Chris@82 851 T8N = T8y + T8x;
Chris@82 852 T8O = T3Q - T3J;
Chris@82 853 T8P = FMA(KP707106781, T8O, T8N);
Chris@82 854 T8V = FNMS(KP707106781, T8O, T8N);
Chris@82 855 }
Chris@82 856 T6p = FNMS(KP414213562, T5X, T5Y);
Chris@82 857 T6q = FMA(KP414213562, T60, T61);
Chris@82 858 T6r = T6p + T6q;
Chris@82 859 T8Q = T6q - T6p;
Chris@82 860 {
Chris@82 861 E T6e, T6x, T6h, T6w, T6d, T6g;
Chris@82 862 T6d = T5i - T5b;
Chris@82 863 T6e = FNMS(KP707106781, T6d, T6c);
Chris@82 864 T6x = FMA(KP707106781, T6d, T6c);
Chris@82 865 T6g = T5s - T5t;
Chris@82 866 T6h = FNMS(KP707106781, T6g, T6f);
Chris@82 867 T6w = FMA(KP707106781, T6g, T6f);
Chris@82 868 T6i = FNMS(KP668178637, T6h, T6e);
Chris@82 869 T6C = FMA(KP198912367, T6w, T6x);
Chris@82 870 T6m = FMA(KP668178637, T6e, T6h);
Chris@82 871 T6y = FNMS(KP198912367, T6x, T6w);
Chris@82 872 }
Chris@82 873 {
Chris@82 874 E T67, T6u, T6a, T6t, T66, T69;
Chris@82 875 T66 = T4J - T4C;
Chris@82 876 T67 = FNMS(KP707106781, T66, T65);
Chris@82 877 T6u = FMA(KP707106781, T66, T65);
Chris@82 878 T69 = T4T - T4U;
Chris@82 879 T6a = FNMS(KP707106781, T69, T68);
Chris@82 880 T6t = FMA(KP707106781, T69, T68);
Chris@82 881 T6b = FMA(KP668178637, T6a, T67);
Chris@82 882 T6B = FNMS(KP198912367, T6t, T6u);
Chris@82 883 T6l = FNMS(KP668178637, T67, T6a);
Chris@82 884 T6v = FMA(KP198912367, T6u, T6t);
Chris@82 885 }
Chris@82 886 }
Chris@82 887 {
Chris@82 888 E T64, T6j, T8R, T8S;
Chris@82 889 T64 = FMA(KP923879532, T63, T5W);
Chris@82 890 T6j = T6b + T6i;
Chris@82 891 ri[WS(rs, 19)] = FNMS(KP831469612, T6j, T64);
Chris@82 892 ri[WS(rs, 3)] = FMA(KP831469612, T6j, T64);
Chris@82 893 T8R = FMA(KP923879532, T8Q, T8P);
Chris@82 894 T8S = T6l + T6m;
Chris@82 895 ii[WS(rs, 3)] = FMA(KP831469612, T8S, T8R);
Chris@82 896 ii[WS(rs, 19)] = FNMS(KP831469612, T8S, T8R);
Chris@82 897 }
Chris@82 898 {
Chris@82 899 E T6k, T6n, T8T, T8U;
Chris@82 900 T6k = FNMS(KP923879532, T63, T5W);
Chris@82 901 T6n = T6l - T6m;
Chris@82 902 ri[WS(rs, 27)] = FNMS(KP831469612, T6n, T6k);
Chris@82 903 ri[WS(rs, 11)] = FMA(KP831469612, T6n, T6k);
Chris@82 904 T8T = FNMS(KP923879532, T8Q, T8P);
Chris@82 905 T8U = T6i - T6b;
Chris@82 906 ii[WS(rs, 11)] = FMA(KP831469612, T8U, T8T);
Chris@82 907 ii[WS(rs, 27)] = FNMS(KP831469612, T8U, T8T);
Chris@82 908 }
Chris@82 909 {
Chris@82 910 E T6s, T6z, T8X, T8Y;
Chris@82 911 T6s = FNMS(KP923879532, T6r, T6o);
Chris@82 912 T6z = T6v - T6y;
Chris@82 913 ri[WS(rs, 23)] = FNMS(KP980785280, T6z, T6s);
Chris@82 914 ri[WS(rs, 7)] = FMA(KP980785280, T6z, T6s);
Chris@82 915 T8X = FNMS(KP923879532, T8W, T8V);
Chris@82 916 T8Y = T6C - T6B;
Chris@82 917 ii[WS(rs, 7)] = FMA(KP980785280, T8Y, T8X);
Chris@82 918 ii[WS(rs, 23)] = FNMS(KP980785280, T8Y, T8X);
Chris@82 919 }
Chris@82 920 {
Chris@82 921 E T6A, T6D, T8Z, T90;
Chris@82 922 T6A = FMA(KP923879532, T6r, T6o);
Chris@82 923 T6D = T6B + T6C;
Chris@82 924 ri[WS(rs, 15)] = FNMS(KP980785280, T6D, T6A);
Chris@82 925 ri[WS(rs, 31)] = FMA(KP980785280, T6D, T6A);
Chris@82 926 T8Z = FMA(KP923879532, T8W, T8V);
Chris@82 927 T90 = T6v + T6y;
Chris@82 928 ii[WS(rs, 15)] = FNMS(KP980785280, T90, T8Z);
Chris@82 929 ii[WS(rs, 31)] = FMA(KP980785280, T90, T8Z);
Chris@82 930 }
Chris@82 931 }
Chris@82 932 }
Chris@82 933 }
Chris@82 934 }
Chris@82 935
Chris@82 936 static const tw_instr twinstr[] = {
Chris@82 937 {TW_FULL, 0, 32},
Chris@82 938 {TW_NEXT, 1, 0}
Chris@82 939 };
Chris@82 940
Chris@82 941 static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 };
Chris@82 942
Chris@82 943 void X(codelet_t1_32) (planner *p) {
Chris@82 944 X(kdft_dit_register) (p, t1_32, &desc);
Chris@82 945 }
Chris@82 946 #else
Chris@82 947
Chris@82 948 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include dft/scalar/t.h */
Chris@82 949
Chris@82 950 /*
Chris@82 951 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 952 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 953 * 96 stack variables, 7 constants, and 128 memory accesses
Chris@82 954 */
Chris@82 955 #include "dft/scalar/t.h"
Chris@82 956
Chris@82 957 static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 958 {
Chris@82 959 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 960 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 961 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 962 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 963 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 964 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 965 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 966 {
Chris@82 967 INT m;
Chris@82 968 for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 969 E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41;
Chris@82 970 E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U;
Chris@82 971 E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x;
Chris@82 972 E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;
Chris@82 973 E T4m, T5h, T4v, T5e;
Chris@82 974 {
Chris@82 975 E T1, T76, T6, T75, Tc, T32, Th, T33;
Chris@82 976 T1 = ri[0];
Chris@82 977 T76 = ii[0];
Chris@82 978 {
Chris@82 979 E T3, T5, T2, T4;
Chris@82 980 T3 = ri[WS(rs, 16)];
Chris@82 981 T5 = ii[WS(rs, 16)];
Chris@82 982 T2 = W[30];
Chris@82 983 T4 = W[31];
Chris@82 984 T6 = FMA(T2, T3, T4 * T5);
Chris@82 985 T75 = FNMS(T4, T3, T2 * T5);
Chris@82 986 }
Chris@82 987 {
Chris@82 988 E T9, Tb, T8, Ta;
Chris@82 989 T9 = ri[WS(rs, 8)];
Chris@82 990 Tb = ii[WS(rs, 8)];
Chris@82 991 T8 = W[14];
Chris@82 992 Ta = W[15];
Chris@82 993 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 994 T32 = FNMS(Ta, T9, T8 * Tb);
Chris@82 995 }
Chris@82 996 {
Chris@82 997 E Te, Tg, Td, Tf;
Chris@82 998 Te = ri[WS(rs, 24)];
Chris@82 999 Tg = ii[WS(rs, 24)];
Chris@82 1000 Td = W[46];
Chris@82 1001 Tf = W[47];
Chris@82 1002 Th = FMA(Td, Te, Tf * Tg);
Chris@82 1003 T33 = FNMS(Tf, Te, Td * Tg);
Chris@82 1004 }
Chris@82 1005 {
Chris@82 1006 E T7, Ti, T7A, T7B;
Chris@82 1007 T7 = T1 + T6;
Chris@82 1008 Ti = Tc + Th;
Chris@82 1009 Tj = T7 + Ti;
Chris@82 1010 T5F = T7 - Ti;
Chris@82 1011 T7A = T76 - T75;
Chris@82 1012 T7B = Tc - Th;
Chris@82 1013 T7C = T7A - T7B;
Chris@82 1014 T7Q = T7B + T7A;
Chris@82 1015 }
Chris@82 1016 {
Chris@82 1017 E T31, T34, T74, T77;
Chris@82 1018 T31 = T1 - T6;
Chris@82 1019 T34 = T32 - T33;
Chris@82 1020 T35 = T31 - T34;
Chris@82 1021 T4T = T31 + T34;
Chris@82 1022 T74 = T32 + T33;
Chris@82 1023 T77 = T75 + T76;
Chris@82 1024 T78 = T74 + T77;
Chris@82 1025 T7m = T77 - T74;
Chris@82 1026 }
Chris@82 1027 }
Chris@82 1028 {
Chris@82 1029 E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;
Chris@82 1030 {
Chris@82 1031 E T1v, T1x, T1u, T1w;
Chris@82 1032 T1v = ri[WS(rs, 1)];
Chris@82 1033 T1x = ii[WS(rs, 1)];
Chris@82 1034 T1u = W[0];
Chris@82 1035 T1w = W[1];
Chris@82 1036 T1y = FMA(T1u, T1v, T1w * T1x);
Chris@82 1037 T3G = FNMS(T1w, T1v, T1u * T1x);
Chris@82 1038 }
Chris@82 1039 {
Chris@82 1040 E T1L, T1N, T1K, T1M;
Chris@82 1041 T1L = ri[WS(rs, 25)];
Chris@82 1042 T1N = ii[WS(rs, 25)];
Chris@82 1043 T1K = W[48];
Chris@82 1044 T1M = W[49];
Chris@82 1045 T1O = FMA(T1K, T1L, T1M * T1N);
Chris@82 1046 T3Z = FNMS(T1M, T1L, T1K * T1N);
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T1A, T1C, T1z, T1B;
Chris@82 1050 T1A = ri[WS(rs, 17)];
Chris@82 1051 T1C = ii[WS(rs, 17)];
Chris@82 1052 T1z = W[32];
Chris@82 1053 T1B = W[33];
Chris@82 1054 T1D = FMA(T1z, T1A, T1B * T1C);
Chris@82 1055 T3H = FNMS(T1B, T1A, T1z * T1C);
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T1G, T1I, T1F, T1H;
Chris@82 1059 T1G = ri[WS(rs, 9)];
Chris@82 1060 T1I = ii[WS(rs, 9)];
Chris@82 1061 T1F = W[16];
Chris@82 1062 T1H = W[17];
Chris@82 1063 T1J = FMA(T1F, T1G, T1H * T1I);
Chris@82 1064 T3Y = FNMS(T1H, T1G, T1F * T1I);
Chris@82 1065 }
Chris@82 1066 {
Chris@82 1067 E T1E, T1P, T5W, T5X;
Chris@82 1068 T1E = T1y + T1D;
Chris@82 1069 T1P = T1J + T1O;
Chris@82 1070 T1Q = T1E + T1P;
Chris@82 1071 T61 = T1E - T1P;
Chris@82 1072 T5W = T3G + T3H;
Chris@82 1073 T5X = T3Y + T3Z;
Chris@82 1074 T5Y = T5W - T5X;
Chris@82 1075 T6J = T5W + T5X;
Chris@82 1076 }
Chris@82 1077 {
Chris@82 1078 E T3I, T3J, T3X, T40;
Chris@82 1079 T3I = T3G - T3H;
Chris@82 1080 T3J = T1J - T1O;
Chris@82 1081 T3K = T3I + T3J;
Chris@82 1082 T59 = T3I - T3J;
Chris@82 1083 T3X = T1y - T1D;
Chris@82 1084 T40 = T3Y - T3Z;
Chris@82 1085 T41 = T3X - T40;
Chris@82 1086 T56 = T3X + T40;
Chris@82 1087 }
Chris@82 1088 }
Chris@82 1089 {
Chris@82 1090 E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;
Chris@82 1091 {
Chris@82 1092 E T2g, T2i, T2f, T2h;
Chris@82 1093 T2g = ri[WS(rs, 31)];
Chris@82 1094 T2i = ii[WS(rs, 31)];
Chris@82 1095 T2f = W[60];
Chris@82 1096 T2h = W[61];
Chris@82 1097 T2j = FMA(T2f, T2g, T2h * T2i);
Chris@82 1098 T4o = FNMS(T2h, T2g, T2f * T2i);
Chris@82 1099 }
Chris@82 1100 {
Chris@82 1101 E T2w, T2y, T2v, T2x;
Chris@82 1102 T2w = ri[WS(rs, 23)];
Chris@82 1103 T2y = ii[WS(rs, 23)];
Chris@82 1104 T2v = W[44];
Chris@82 1105 T2x = W[45];
Chris@82 1106 T2z = FMA(T2v, T2w, T2x * T2y);
Chris@82 1107 T49 = FNMS(T2x, T2w, T2v * T2y);
Chris@82 1108 }
Chris@82 1109 {
Chris@82 1110 E T2l, T2n, T2k, T2m;
Chris@82 1111 T2l = ri[WS(rs, 15)];
Chris@82 1112 T2n = ii[WS(rs, 15)];
Chris@82 1113 T2k = W[28];
Chris@82 1114 T2m = W[29];
Chris@82 1115 T2o = FMA(T2k, T2l, T2m * T2n);
Chris@82 1116 T4p = FNMS(T2m, T2l, T2k * T2n);
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E T2r, T2t, T2q, T2s;
Chris@82 1120 T2r = ri[WS(rs, 7)];
Chris@82 1121 T2t = ii[WS(rs, 7)];
Chris@82 1122 T2q = W[12];
Chris@82 1123 T2s = W[13];
Chris@82 1124 T2u = FMA(T2q, T2r, T2s * T2t);
Chris@82 1125 T48 = FNMS(T2s, T2r, T2q * T2t);
Chris@82 1126 }
Chris@82 1127 {
Chris@82 1128 E T2p, T2A, T6c, T6d;
Chris@82 1129 T2p = T2j + T2o;
Chris@82 1130 T2A = T2u + T2z;
Chris@82 1131 T2B = T2p + T2A;
Chris@82 1132 T67 = T2p - T2A;
Chris@82 1133 T6c = T4o + T4p;
Chris@82 1134 T6d = T48 + T49;
Chris@82 1135 T6e = T6c - T6d;
Chris@82 1136 T6O = T6c + T6d;
Chris@82 1137 }
Chris@82 1138 {
Chris@82 1139 E T47, T4a, T4q, T4r;
Chris@82 1140 T47 = T2j - T2o;
Chris@82 1141 T4a = T48 - T49;
Chris@82 1142 T4b = T47 - T4a;
Chris@82 1143 T5d = T47 + T4a;
Chris@82 1144 T4q = T4o - T4p;
Chris@82 1145 T4r = T2u - T2z;
Chris@82 1146 T4s = T4q + T4r;
Chris@82 1147 T5g = T4q - T4r;
Chris@82 1148 }
Chris@82 1149 }
Chris@82 1150 {
Chris@82 1151 E To, T36, TE, T3d, Tt, T37, Tz, T3c;
Chris@82 1152 {
Chris@82 1153 E Tl, Tn, Tk, Tm;
Chris@82 1154 Tl = ri[WS(rs, 4)];
Chris@82 1155 Tn = ii[WS(rs, 4)];
Chris@82 1156 Tk = W[6];
Chris@82 1157 Tm = W[7];
Chris@82 1158 To = FMA(Tk, Tl, Tm * Tn);
Chris@82 1159 T36 = FNMS(Tm, Tl, Tk * Tn);
Chris@82 1160 }
Chris@82 1161 {
Chris@82 1162 E TB, TD, TA, TC;
Chris@82 1163 TB = ri[WS(rs, 12)];
Chris@82 1164 TD = ii[WS(rs, 12)];
Chris@82 1165 TA = W[22];
Chris@82 1166 TC = W[23];
Chris@82 1167 TE = FMA(TA, TB, TC * TD);
Chris@82 1168 T3d = FNMS(TC, TB, TA * TD);
Chris@82 1169 }
Chris@82 1170 {
Chris@82 1171 E Tq, Ts, Tp, Tr;
Chris@82 1172 Tq = ri[WS(rs, 20)];
Chris@82 1173 Ts = ii[WS(rs, 20)];
Chris@82 1174 Tp = W[38];
Chris@82 1175 Tr = W[39];
Chris@82 1176 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 1177 T37 = FNMS(Tr, Tq, Tp * Ts);
Chris@82 1178 }
Chris@82 1179 {
Chris@82 1180 E Tw, Ty, Tv, Tx;
Chris@82 1181 Tw = ri[WS(rs, 28)];
Chris@82 1182 Ty = ii[WS(rs, 28)];
Chris@82 1183 Tv = W[54];
Chris@82 1184 Tx = W[55];
Chris@82 1185 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 1186 T3c = FNMS(Tx, Tw, Tv * Ty);
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E Tu, TF, T5G, T5H;
Chris@82 1190 Tu = To + Tt;
Chris@82 1191 TF = Tz + TE;
Chris@82 1192 TG = Tu + TF;
Chris@82 1193 T7l = TF - Tu;
Chris@82 1194 T5G = T36 + T37;
Chris@82 1195 T5H = T3c + T3d;
Chris@82 1196 T5I = T5G - T5H;
Chris@82 1197 T73 = T5G + T5H;
Chris@82 1198 }
Chris@82 1199 {
Chris@82 1200 E T38, T39, T3b, T3e;
Chris@82 1201 T38 = T36 - T37;
Chris@82 1202 T39 = To - Tt;
Chris@82 1203 T3a = T38 - T39;
Chris@82 1204 T4U = T39 + T38;
Chris@82 1205 T3b = Tz - TE;
Chris@82 1206 T3e = T3c - T3d;
Chris@82 1207 T3f = T3b + T3e;
Chris@82 1208 T4V = T3b - T3e;
Chris@82 1209 }
Chris@82 1210 }
Chris@82 1211 {
Chris@82 1212 E TM, T3i, T12, T3p, TR, T3j, TX, T3o;
Chris@82 1213 {
Chris@82 1214 E TJ, TL, TI, TK;
Chris@82 1215 TJ = ri[WS(rs, 2)];
Chris@82 1216 TL = ii[WS(rs, 2)];
Chris@82 1217 TI = W[2];
Chris@82 1218 TK = W[3];
Chris@82 1219 TM = FMA(TI, TJ, TK * TL);
Chris@82 1220 T3i = FNMS(TK, TJ, TI * TL);
Chris@82 1221 }
Chris@82 1222 {
Chris@82 1223 E TZ, T11, TY, T10;
Chris@82 1224 TZ = ri[WS(rs, 26)];
Chris@82 1225 T11 = ii[WS(rs, 26)];
Chris@82 1226 TY = W[50];
Chris@82 1227 T10 = W[51];
Chris@82 1228 T12 = FMA(TY, TZ, T10 * T11);
Chris@82 1229 T3p = FNMS(T10, TZ, TY * T11);
Chris@82 1230 }
Chris@82 1231 {
Chris@82 1232 E TO, TQ, TN, TP;
Chris@82 1233 TO = ri[WS(rs, 18)];
Chris@82 1234 TQ = ii[WS(rs, 18)];
Chris@82 1235 TN = W[34];
Chris@82 1236 TP = W[35];
Chris@82 1237 TR = FMA(TN, TO, TP * TQ);
Chris@82 1238 T3j = FNMS(TP, TO, TN * TQ);
Chris@82 1239 }
Chris@82 1240 {
Chris@82 1241 E TU, TW, TT, TV;
Chris@82 1242 TU = ri[WS(rs, 10)];
Chris@82 1243 TW = ii[WS(rs, 10)];
Chris@82 1244 TT = W[18];
Chris@82 1245 TV = W[19];
Chris@82 1246 TX = FMA(TT, TU, TV * TW);
Chris@82 1247 T3o = FNMS(TV, TU, TT * TW);
Chris@82 1248 }
Chris@82 1249 {
Chris@82 1250 E TS, T13, T5K, T5L;
Chris@82 1251 TS = TM + TR;
Chris@82 1252 T13 = TX + T12;
Chris@82 1253 T14 = TS + T13;
Chris@82 1254 T5N = TS - T13;
Chris@82 1255 T5K = T3i + T3j;
Chris@82 1256 T5L = T3o + T3p;
Chris@82 1257 T5M = T5K - T5L;
Chris@82 1258 T6E = T5K + T5L;
Chris@82 1259 }
Chris@82 1260 {
Chris@82 1261 E T3k, T3l, T3n, T3q;
Chris@82 1262 T3k = T3i - T3j;
Chris@82 1263 T3l = TX - T12;
Chris@82 1264 T3m = T3k + T3l;
Chris@82 1265 T4Y = T3k - T3l;
Chris@82 1266 T3n = TM - TR;
Chris@82 1267 T3q = T3o - T3p;
Chris@82 1268 T3r = T3n - T3q;
Chris@82 1269 T4Z = T3n + T3q;
Chris@82 1270 }
Chris@82 1271 }
Chris@82 1272 {
Chris@82 1273 E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
Chris@82 1274 {
Chris@82 1275 E T16, T18, T15, T17;
Chris@82 1276 T16 = ri[WS(rs, 30)];
Chris@82 1277 T18 = ii[WS(rs, 30)];
Chris@82 1278 T15 = W[58];
Chris@82 1279 T17 = W[59];
Chris@82 1280 T19 = FMA(T15, T16, T17 * T18);
Chris@82 1281 T3t = FNMS(T17, T16, T15 * T18);
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E T1m, T1o, T1l, T1n;
Chris@82 1285 T1m = ri[WS(rs, 22)];
Chris@82 1286 T1o = ii[WS(rs, 22)];
Chris@82 1287 T1l = W[42];
Chris@82 1288 T1n = W[43];
Chris@82 1289 T1p = FMA(T1l, T1m, T1n * T1o);
Chris@82 1290 T3A = FNMS(T1n, T1m, T1l * T1o);
Chris@82 1291 }
Chris@82 1292 {
Chris@82 1293 E T1b, T1d, T1a, T1c;
Chris@82 1294 T1b = ri[WS(rs, 14)];
Chris@82 1295 T1d = ii[WS(rs, 14)];
Chris@82 1296 T1a = W[26];
Chris@82 1297 T1c = W[27];
Chris@82 1298 T1e = FMA(T1a, T1b, T1c * T1d);
Chris@82 1299 T3u = FNMS(T1c, T1b, T1a * T1d);
Chris@82 1300 }
Chris@82 1301 {
Chris@82 1302 E T1h, T1j, T1g, T1i;
Chris@82 1303 T1h = ri[WS(rs, 6)];
Chris@82 1304 T1j = ii[WS(rs, 6)];
Chris@82 1305 T1g = W[10];
Chris@82 1306 T1i = W[11];
Chris@82 1307 T1k = FMA(T1g, T1h, T1i * T1j);
Chris@82 1308 T3z = FNMS(T1i, T1h, T1g * T1j);
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T1f, T1q, T5Q, T5R;
Chris@82 1312 T1f = T19 + T1e;
Chris@82 1313 T1q = T1k + T1p;
Chris@82 1314 T1r = T1f + T1q;
Chris@82 1315 T5P = T1f - T1q;
Chris@82 1316 T5Q = T3t + T3u;
Chris@82 1317 T5R = T3z + T3A;
Chris@82 1318 T5S = T5Q - T5R;
Chris@82 1319 T6F = T5Q + T5R;
Chris@82 1320 }
Chris@82 1321 {
Chris@82 1322 E T3v, T3w, T3y, T3B;
Chris@82 1323 T3v = T3t - T3u;
Chris@82 1324 T3w = T1k - T1p;
Chris@82 1325 T3x = T3v + T3w;
Chris@82 1326 T51 = T3v - T3w;
Chris@82 1327 T3y = T19 - T1e;
Chris@82 1328 T3B = T3z - T3A;
Chris@82 1329 T3C = T3y - T3B;
Chris@82 1330 T52 = T3y + T3B;
Chris@82 1331 }
Chris@82 1332 }
Chris@82 1333 {
Chris@82 1334 E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O;
Chris@82 1335 {
Chris@82 1336 E T1S, T1U, T1R, T1T;
Chris@82 1337 T1S = ri[WS(rs, 5)];
Chris@82 1338 T1U = ii[WS(rs, 5)];
Chris@82 1339 T1R = W[8];
Chris@82 1340 T1T = W[9];
Chris@82 1341 T1V = FMA(T1R, T1S, T1T * T1U);
Chris@82 1342 T3R = FNMS(T1T, T1S, T1R * T1U);
Chris@82 1343 }
Chris@82 1344 {
Chris@82 1345 E T1X, T1Z, T1W, T1Y;
Chris@82 1346 T1X = ri[WS(rs, 21)];
Chris@82 1347 T1Z = ii[WS(rs, 21)];
Chris@82 1348 T1W = W[40];
Chris@82 1349 T1Y = W[41];
Chris@82 1350 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 1351 T3S = FNMS(T1Y, T1X, T1W * T1Z);
Chris@82 1352 }
Chris@82 1353 T3Q = T1V - T20;
Chris@82 1354 T3T = T3R - T3S;
Chris@82 1355 {
Chris@82 1356 E T23, T25, T22, T24;
Chris@82 1357 T23 = ri[WS(rs, 29)];
Chris@82 1358 T25 = ii[WS(rs, 29)];
Chris@82 1359 T22 = W[56];
Chris@82 1360 T24 = W[57];
Chris@82 1361 T26 = FMA(T22, T23, T24 * T25);
Chris@82 1362 T3M = FNMS(T24, T23, T22 * T25);
Chris@82 1363 }
Chris@82 1364 {
Chris@82 1365 E T28, T2a, T27, T29;
Chris@82 1366 T28 = ri[WS(rs, 13)];
Chris@82 1367 T2a = ii[WS(rs, 13)];
Chris@82 1368 T27 = W[24];
Chris@82 1369 T29 = W[25];
Chris@82 1370 T2b = FMA(T27, T28, T29 * T2a);
Chris@82 1371 T3N = FNMS(T29, T28, T27 * T2a);
Chris@82 1372 }
Chris@82 1373 T3L = T26 - T2b;
Chris@82 1374 T3O = T3M - T3N;
Chris@82 1375 {
Chris@82 1376 E T21, T2c, T62, T63;
Chris@82 1377 T21 = T1V + T20;
Chris@82 1378 T2c = T26 + T2b;
Chris@82 1379 T2d = T21 + T2c;
Chris@82 1380 T5Z = T2c - T21;
Chris@82 1381 T62 = T3R + T3S;
Chris@82 1382 T63 = T3M + T3N;
Chris@82 1383 T64 = T62 - T63;
Chris@82 1384 T6K = T62 + T63;
Chris@82 1385 }
Chris@82 1386 {
Chris@82 1387 E T3P, T3U, T42, T43;
Chris@82 1388 T3P = T3L - T3O;
Chris@82 1389 T3U = T3Q + T3T;
Chris@82 1390 T3V = KP707106781 * (T3P - T3U);
Chris@82 1391 T57 = KP707106781 * (T3U + T3P);
Chris@82 1392 T42 = T3T - T3Q;
Chris@82 1393 T43 = T3L + T3O;
Chris@82 1394 T44 = KP707106781 * (T42 - T43);
Chris@82 1395 T5a = KP707106781 * (T42 + T43);
Chris@82 1396 }
Chris@82 1397 }
Chris@82 1398 {
Chris@82 1399 E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k;
Chris@82 1400 {
Chris@82 1401 E T2D, T2F, T2C, T2E;
Chris@82 1402 T2D = ri[WS(rs, 3)];
Chris@82 1403 T2F = ii[WS(rs, 3)];
Chris@82 1404 T2C = W[4];
Chris@82 1405 T2E = W[5];
Chris@82 1406 T2G = FMA(T2C, T2D, T2E * T2F);
Chris@82 1407 T4c = FNMS(T2E, T2D, T2C * T2F);
Chris@82 1408 }
Chris@82 1409 {
Chris@82 1410 E T2I, T2K, T2H, T2J;
Chris@82 1411 T2I = ri[WS(rs, 19)];
Chris@82 1412 T2K = ii[WS(rs, 19)];
Chris@82 1413 T2H = W[36];
Chris@82 1414 T2J = W[37];
Chris@82 1415 T2L = FMA(T2H, T2I, T2J * T2K);
Chris@82 1416 T4d = FNMS(T2J, T2I, T2H * T2K);
Chris@82 1417 }
Chris@82 1418 T4e = T4c - T4d;
Chris@82 1419 T4f = T2G - T2L;
Chris@82 1420 {
Chris@82 1421 E T2O, T2Q, T2N, T2P;
Chris@82 1422 T2O = ri[WS(rs, 27)];
Chris@82 1423 T2Q = ii[WS(rs, 27)];
Chris@82 1424 T2N = W[52];
Chris@82 1425 T2P = W[53];
Chris@82 1426 T2R = FMA(T2N, T2O, T2P * T2Q);
Chris@82 1427 T4i = FNMS(T2P, T2O, T2N * T2Q);
Chris@82 1428 }
Chris@82 1429 {
Chris@82 1430 E T2T, T2V, T2S, T2U;
Chris@82 1431 T2T = ri[WS(rs, 11)];
Chris@82 1432 T2V = ii[WS(rs, 11)];
Chris@82 1433 T2S = W[20];
Chris@82 1434 T2U = W[21];
Chris@82 1435 T2W = FMA(T2S, T2T, T2U * T2V);
Chris@82 1436 T4j = FNMS(T2U, T2T, T2S * T2V);
Chris@82 1437 }
Chris@82 1438 T4h = T2R - T2W;
Chris@82 1439 T4k = T4i - T4j;
Chris@82 1440 {
Chris@82 1441 E T2M, T2X, T68, T69;
Chris@82 1442 T2M = T2G + T2L;
Chris@82 1443 T2X = T2R + T2W;
Chris@82 1444 T2Y = T2M + T2X;
Chris@82 1445 T6f = T2X - T2M;
Chris@82 1446 T68 = T4c + T4d;
Chris@82 1447 T69 = T4i + T4j;
Chris@82 1448 T6a = T68 - T69;
Chris@82 1449 T6P = T68 + T69;
Chris@82 1450 }
Chris@82 1451 {
Chris@82 1452 E T4g, T4l, T4t, T4u;
Chris@82 1453 T4g = T4e - T4f;
Chris@82 1454 T4l = T4h + T4k;
Chris@82 1455 T4m = KP707106781 * (T4g - T4l);
Chris@82 1456 T5h = KP707106781 * (T4g + T4l);
Chris@82 1457 T4t = T4h - T4k;
Chris@82 1458 T4u = T4f + T4e;
Chris@82 1459 T4v = KP707106781 * (T4t - T4u);
Chris@82 1460 T5e = KP707106781 * (T4u + T4t);
Chris@82 1461 }
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
Chris@82 1465 {
Chris@82 1466 E TH, T1s, T72, T79;
Chris@82 1467 TH = Tj + TG;
Chris@82 1468 T1s = T14 + T1r;
Chris@82 1469 T1t = TH + T1s;
Chris@82 1470 T6X = TH - T1s;
Chris@82 1471 T72 = T6E + T6F;
Chris@82 1472 T79 = T73 + T78;
Chris@82 1473 T7a = T72 + T79;
Chris@82 1474 T7c = T79 - T72;
Chris@82 1475 }
Chris@82 1476 {
Chris@82 1477 E T2e, T2Z, T6Y, T6Z;
Chris@82 1478 T2e = T1Q + T2d;
Chris@82 1479 T2Z = T2B + T2Y;
Chris@82 1480 T30 = T2e + T2Z;
Chris@82 1481 T7b = T2Z - T2e;
Chris@82 1482 T6Y = T6J + T6K;
Chris@82 1483 T6Z = T6O + T6P;
Chris@82 1484 T70 = T6Y - T6Z;
Chris@82 1485 T71 = T6Y + T6Z;
Chris@82 1486 }
Chris@82 1487 ri[WS(rs, 16)] = T1t - T30;
Chris@82 1488 ii[WS(rs, 16)] = T7a - T71;
Chris@82 1489 ri[0] = T1t + T30;
Chris@82 1490 ii[0] = T71 + T7a;
Chris@82 1491 ri[WS(rs, 24)] = T6X - T70;
Chris@82 1492 ii[WS(rs, 24)] = T7c - T7b;
Chris@82 1493 ri[WS(rs, 8)] = T6X + T70;
Chris@82 1494 ii[WS(rs, 8)] = T7b + T7c;
Chris@82 1495 }
Chris@82 1496 {
Chris@82 1497 E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
Chris@82 1498 {
Chris@82 1499 E T6D, T6G, T7e, T7f;
Chris@82 1500 T6D = Tj - TG;
Chris@82 1501 T6G = T6E - T6F;
Chris@82 1502 T6H = T6D + T6G;
Chris@82 1503 T6T = T6D - T6G;
Chris@82 1504 T7e = T1r - T14;
Chris@82 1505 T7f = T78 - T73;
Chris@82 1506 T7g = T7e + T7f;
Chris@82 1507 T7i = T7f - T7e;
Chris@82 1508 }
Chris@82 1509 {
Chris@82 1510 E T6I, T6L, T6N, T6Q;
Chris@82 1511 T6I = T1Q - T2d;
Chris@82 1512 T6L = T6J - T6K;
Chris@82 1513 T6M = T6I + T6L;
Chris@82 1514 T6U = T6L - T6I;
Chris@82 1515 T6N = T2B - T2Y;
Chris@82 1516 T6Q = T6O - T6P;
Chris@82 1517 T6R = T6N - T6Q;
Chris@82 1518 T6V = T6N + T6Q;
Chris@82 1519 }
Chris@82 1520 {
Chris@82 1521 E T6S, T7d, T6W, T7h;
Chris@82 1522 T6S = KP707106781 * (T6M + T6R);
Chris@82 1523 ri[WS(rs, 20)] = T6H - T6S;
Chris@82 1524 ri[WS(rs, 4)] = T6H + T6S;
Chris@82 1525 T7d = KP707106781 * (T6U + T6V);
Chris@82 1526 ii[WS(rs, 4)] = T7d + T7g;
Chris@82 1527 ii[WS(rs, 20)] = T7g - T7d;
Chris@82 1528 T6W = KP707106781 * (T6U - T6V);
Chris@82 1529 ri[WS(rs, 28)] = T6T - T6W;
Chris@82 1530 ri[WS(rs, 12)] = T6T + T6W;
Chris@82 1531 T7h = KP707106781 * (T6R - T6M);
Chris@82 1532 ii[WS(rs, 12)] = T7h + T7i;
Chris@82 1533 ii[WS(rs, 28)] = T7i - T7h;
Chris@82 1534 }
Chris@82 1535 }
Chris@82 1536 {
Chris@82 1537 E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
Chris@82 1538 E T6l;
Chris@82 1539 {
Chris@82 1540 E T5O, T5T, T60, T65;
Chris@82 1541 T5J = T5F - T5I;
Chris@82 1542 T7n = T7l + T7m;
Chris@82 1543 T7t = T7m - T7l;
Chris@82 1544 T6n = T5F + T5I;
Chris@82 1545 T5O = T5M - T5N;
Chris@82 1546 T5T = T5P + T5S;
Chris@82 1547 T5U = KP707106781 * (T5O - T5T);
Chris@82 1548 T7k = KP707106781 * (T5O + T5T);
Chris@82 1549 {
Chris@82 1550 E T6v, T6w, T6o, T6p;
Chris@82 1551 T6v = T67 + T6a;
Chris@82 1552 T6w = T6e + T6f;
Chris@82 1553 T6x = FNMS(KP382683432, T6w, KP923879532 * T6v);
Chris@82 1554 T6B = FMA(KP923879532, T6w, KP382683432 * T6v);
Chris@82 1555 T6o = T5N + T5M;
Chris@82 1556 T6p = T5P - T5S;
Chris@82 1557 T6q = KP707106781 * (T6o + T6p);
Chris@82 1558 T7s = KP707106781 * (T6p - T6o);
Chris@82 1559 }
Chris@82 1560 T60 = T5Y - T5Z;
Chris@82 1561 T65 = T61 - T64;
Chris@82 1562 T66 = FMA(KP923879532, T60, KP382683432 * T65);
Chris@82 1563 T6k = FNMS(KP923879532, T65, KP382683432 * T60);
Chris@82 1564 {
Chris@82 1565 E T6s, T6t, T6b, T6g;
Chris@82 1566 T6s = T5Y + T5Z;
Chris@82 1567 T6t = T61 + T64;
Chris@82 1568 T6u = FMA(KP382683432, T6s, KP923879532 * T6t);
Chris@82 1569 T6A = FNMS(KP382683432, T6t, KP923879532 * T6s);
Chris@82 1570 T6b = T67 - T6a;
Chris@82 1571 T6g = T6e - T6f;
Chris@82 1572 T6h = FNMS(KP923879532, T6g, KP382683432 * T6b);
Chris@82 1573 T6l = FMA(KP382683432, T6g, KP923879532 * T6b);
Chris@82 1574 }
Chris@82 1575 }
Chris@82 1576 {
Chris@82 1577 E T5V, T6i, T7r, T7u;
Chris@82 1578 T5V = T5J + T5U;
Chris@82 1579 T6i = T66 + T6h;
Chris@82 1580 ri[WS(rs, 22)] = T5V - T6i;
Chris@82 1581 ri[WS(rs, 6)] = T5V + T6i;
Chris@82 1582 T7r = T6k + T6l;
Chris@82 1583 T7u = T7s + T7t;
Chris@82 1584 ii[WS(rs, 6)] = T7r + T7u;
Chris@82 1585 ii[WS(rs, 22)] = T7u - T7r;
Chris@82 1586 }
Chris@82 1587 {
Chris@82 1588 E T6j, T6m, T7v, T7w;
Chris@82 1589 T6j = T5J - T5U;
Chris@82 1590 T6m = T6k - T6l;
Chris@82 1591 ri[WS(rs, 30)] = T6j - T6m;
Chris@82 1592 ri[WS(rs, 14)] = T6j + T6m;
Chris@82 1593 T7v = T6h - T66;
Chris@82 1594 T7w = T7t - T7s;
Chris@82 1595 ii[WS(rs, 14)] = T7v + T7w;
Chris@82 1596 ii[WS(rs, 30)] = T7w - T7v;
Chris@82 1597 }
Chris@82 1598 {
Chris@82 1599 E T6r, T6y, T7j, T7o;
Chris@82 1600 T6r = T6n + T6q;
Chris@82 1601 T6y = T6u + T6x;
Chris@82 1602 ri[WS(rs, 18)] = T6r - T6y;
Chris@82 1603 ri[WS(rs, 2)] = T6r + T6y;
Chris@82 1604 T7j = T6A + T6B;
Chris@82 1605 T7o = T7k + T7n;
Chris@82 1606 ii[WS(rs, 2)] = T7j + T7o;
Chris@82 1607 ii[WS(rs, 18)] = T7o - T7j;
Chris@82 1608 }
Chris@82 1609 {
Chris@82 1610 E T6z, T6C, T7p, T7q;
Chris@82 1611 T6z = T6n - T6q;
Chris@82 1612 T6C = T6A - T6B;
Chris@82 1613 ri[WS(rs, 26)] = T6z - T6C;
Chris@82 1614 ri[WS(rs, 10)] = T6z + T6C;
Chris@82 1615 T7p = T6x - T6u;
Chris@82 1616 T7q = T7n - T7k;
Chris@82 1617 ii[WS(rs, 10)] = T7p + T7q;
Chris@82 1618 ii[WS(rs, 26)] = T7q - T7p;
Chris@82 1619 }
Chris@82 1620 }
Chris@82 1621 {
Chris@82 1622 E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
Chris@82 1623 E T4B, T3g, T7P;
Chris@82 1624 T3g = KP707106781 * (T3a - T3f);
Chris@82 1625 T3h = T35 - T3g;
Chris@82 1626 T4D = T35 + T3g;
Chris@82 1627 T7P = KP707106781 * (T4V - T4U);
Chris@82 1628 T7R = T7P + T7Q;
Chris@82 1629 T7X = T7Q - T7P;
Chris@82 1630 {
Chris@82 1631 E T3s, T3D, T4L, T4M;
Chris@82 1632 T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
Chris@82 1633 T3D = FMA(KP382683432, T3x, KP923879532 * T3C);
Chris@82 1634 T3E = T3s - T3D;
Chris@82 1635 T7O = T3s + T3D;
Chris@82 1636 T4L = T4b + T4m;
Chris@82 1637 T4M = T4s + T4v;
Chris@82 1638 T4N = FNMS(KP555570233, T4M, KP831469612 * T4L);
Chris@82 1639 T4R = FMA(KP831469612, T4M, KP555570233 * T4L);
Chris@82 1640 }
Chris@82 1641 {
Chris@82 1642 E T3W, T45, T4E, T4F;
Chris@82 1643 T3W = T3K - T3V;
Chris@82 1644 T45 = T41 - T44;
Chris@82 1645 T46 = FMA(KP980785280, T3W, KP195090322 * T45);
Chris@82 1646 T4A = FNMS(KP980785280, T45, KP195090322 * T3W);
Chris@82 1647 T4E = FMA(KP923879532, T3m, KP382683432 * T3r);
Chris@82 1648 T4F = FNMS(KP923879532, T3x, KP382683432 * T3C);
Chris@82 1649 T4G = T4E + T4F;
Chris@82 1650 T7W = T4F - T4E;
Chris@82 1651 }
Chris@82 1652 {
Chris@82 1653 E T4I, T4J, T4n, T4w;
Chris@82 1654 T4I = T3K + T3V;
Chris@82 1655 T4J = T41 + T44;
Chris@82 1656 T4K = FMA(KP555570233, T4I, KP831469612 * T4J);
Chris@82 1657 T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I);
Chris@82 1658 T4n = T4b - T4m;
Chris@82 1659 T4w = T4s - T4v;
Chris@82 1660 T4x = FNMS(KP980785280, T4w, KP195090322 * T4n);
Chris@82 1661 T4B = FMA(KP195090322, T4w, KP980785280 * T4n);
Chris@82 1662 }
Chris@82 1663 {
Chris@82 1664 E T3F, T4y, T7V, T7Y;
Chris@82 1665 T3F = T3h + T3E;
Chris@82 1666 T4y = T46 + T4x;
Chris@82 1667 ri[WS(rs, 23)] = T3F - T4y;
Chris@82 1668 ri[WS(rs, 7)] = T3F + T4y;
Chris@82 1669 T7V = T4A + T4B;
Chris@82 1670 T7Y = T7W + T7X;
Chris@82 1671 ii[WS(rs, 7)] = T7V + T7Y;
Chris@82 1672 ii[WS(rs, 23)] = T7Y - T7V;
Chris@82 1673 }
Chris@82 1674 {
Chris@82 1675 E T4z, T4C, T7Z, T80;
Chris@82 1676 T4z = T3h - T3E;
Chris@82 1677 T4C = T4A - T4B;
Chris@82 1678 ri[WS(rs, 31)] = T4z - T4C;
Chris@82 1679 ri[WS(rs, 15)] = T4z + T4C;
Chris@82 1680 T7Z = T4x - T46;
Chris@82 1681 T80 = T7X - T7W;
Chris@82 1682 ii[WS(rs, 15)] = T7Z + T80;
Chris@82 1683 ii[WS(rs, 31)] = T80 - T7Z;
Chris@82 1684 }
Chris@82 1685 {
Chris@82 1686 E T4H, T4O, T7N, T7S;
Chris@82 1687 T4H = T4D + T4G;
Chris@82 1688 T4O = T4K + T4N;
Chris@82 1689 ri[WS(rs, 19)] = T4H - T4O;
Chris@82 1690 ri[WS(rs, 3)] = T4H + T4O;
Chris@82 1691 T7N = T4Q + T4R;
Chris@82 1692 T7S = T7O + T7R;
Chris@82 1693 ii[WS(rs, 3)] = T7N + T7S;
Chris@82 1694 ii[WS(rs, 19)] = T7S - T7N;
Chris@82 1695 }
Chris@82 1696 {
Chris@82 1697 E T4P, T4S, T7T, T7U;
Chris@82 1698 T4P = T4D - T4G;
Chris@82 1699 T4S = T4Q - T4R;
Chris@82 1700 ri[WS(rs, 27)] = T4P - T4S;
Chris@82 1701 ri[WS(rs, 11)] = T4P + T4S;
Chris@82 1702 T7T = T4N - T4K;
Chris@82 1703 T7U = T7R - T7O;
Chris@82 1704 ii[WS(rs, 11)] = T7T + T7U;
Chris@82 1705 ii[WS(rs, 27)] = T7U - T7T;
Chris@82 1706 }
Chris@82 1707 }
Chris@82 1708 {
Chris@82 1709 E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
Chris@82 1710 E T5n, T4W, T7z;
Chris@82 1711 T4W = KP707106781 * (T4U + T4V);
Chris@82 1712 T4X = T4T - T4W;
Chris@82 1713 T5p = T4T + T4W;
Chris@82 1714 T7z = KP707106781 * (T3a + T3f);
Chris@82 1715 T7D = T7z + T7C;
Chris@82 1716 T7J = T7C - T7z;
Chris@82 1717 {
Chris@82 1718 E T50, T53, T5x, T5y;
Chris@82 1719 T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y);
Chris@82 1720 T53 = FMA(KP923879532, T51, KP382683432 * T52);
Chris@82 1721 T54 = T50 - T53;
Chris@82 1722 T7y = T50 + T53;
Chris@82 1723 T5x = T5d + T5e;
Chris@82 1724 T5y = T5g + T5h;
Chris@82 1725 T5z = FNMS(KP195090322, T5y, KP980785280 * T5x);
Chris@82 1726 T5D = FMA(KP195090322, T5x, KP980785280 * T5y);
Chris@82 1727 }
Chris@82 1728 {
Chris@82 1729 E T58, T5b, T5q, T5r;
Chris@82 1730 T58 = T56 - T57;
Chris@82 1731 T5b = T59 - T5a;
Chris@82 1732 T5c = FMA(KP555570233, T58, KP831469612 * T5b);
Chris@82 1733 T5m = FNMS(KP831469612, T58, KP555570233 * T5b);
Chris@82 1734 T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z);
Chris@82 1735 T5r = FNMS(KP382683432, T51, KP923879532 * T52);
Chris@82 1736 T5s = T5q + T5r;
Chris@82 1737 T7I = T5r - T5q;
Chris@82 1738 }
Chris@82 1739 {
Chris@82 1740 E T5u, T5v, T5f, T5i;
Chris@82 1741 T5u = T56 + T57;
Chris@82 1742 T5v = T59 + T5a;
Chris@82 1743 T5w = FMA(KP980785280, T5u, KP195090322 * T5v);
Chris@82 1744 T5C = FNMS(KP195090322, T5u, KP980785280 * T5v);
Chris@82 1745 T5f = T5d - T5e;
Chris@82 1746 T5i = T5g - T5h;
Chris@82 1747 T5j = FNMS(KP831469612, T5i, KP555570233 * T5f);
Chris@82 1748 T5n = FMA(KP831469612, T5f, KP555570233 * T5i);
Chris@82 1749 }
Chris@82 1750 {
Chris@82 1751 E T55, T5k, T7H, T7K;
Chris@82 1752 T55 = T4X + T54;
Chris@82 1753 T5k = T5c + T5j;
Chris@82 1754 ri[WS(rs, 21)] = T55 - T5k;
Chris@82 1755 ri[WS(rs, 5)] = T55 + T5k;
Chris@82 1756 T7H = T5m + T5n;
Chris@82 1757 T7K = T7I + T7J;
Chris@82 1758 ii[WS(rs, 5)] = T7H + T7K;
Chris@82 1759 ii[WS(rs, 21)] = T7K - T7H;
Chris@82 1760 }
Chris@82 1761 {
Chris@82 1762 E T5l, T5o, T7L, T7M;
Chris@82 1763 T5l = T4X - T54;
Chris@82 1764 T5o = T5m - T5n;
Chris@82 1765 ri[WS(rs, 29)] = T5l - T5o;
Chris@82 1766 ri[WS(rs, 13)] = T5l + T5o;
Chris@82 1767 T7L = T5j - T5c;
Chris@82 1768 T7M = T7J - T7I;
Chris@82 1769 ii[WS(rs, 13)] = T7L + T7M;
Chris@82 1770 ii[WS(rs, 29)] = T7M - T7L;
Chris@82 1771 }
Chris@82 1772 {
Chris@82 1773 E T5t, T5A, T7x, T7E;
Chris@82 1774 T5t = T5p + T5s;
Chris@82 1775 T5A = T5w + T5z;
Chris@82 1776 ri[WS(rs, 17)] = T5t - T5A;
Chris@82 1777 ri[WS(rs, 1)] = T5t + T5A;
Chris@82 1778 T7x = T5C + T5D;
Chris@82 1779 T7E = T7y + T7D;
Chris@82 1780 ii[WS(rs, 1)] = T7x + T7E;
Chris@82 1781 ii[WS(rs, 17)] = T7E - T7x;
Chris@82 1782 }
Chris@82 1783 {
Chris@82 1784 E T5B, T5E, T7F, T7G;
Chris@82 1785 T5B = T5p - T5s;
Chris@82 1786 T5E = T5C - T5D;
Chris@82 1787 ri[WS(rs, 25)] = T5B - T5E;
Chris@82 1788 ri[WS(rs, 9)] = T5B + T5E;
Chris@82 1789 T7F = T5z - T5w;
Chris@82 1790 T7G = T7D - T7y;
Chris@82 1791 ii[WS(rs, 9)] = T7F + T7G;
Chris@82 1792 ii[WS(rs, 25)] = T7G - T7F;
Chris@82 1793 }
Chris@82 1794 }
Chris@82 1795 }
Chris@82 1796 }
Chris@82 1797 }
Chris@82 1798
Chris@82 1799 static const tw_instr twinstr[] = {
Chris@82 1800 {TW_FULL, 0, 32},
Chris@82 1801 {TW_NEXT, 1, 0}
Chris@82 1802 };
Chris@82 1803
Chris@82 1804 static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {340, 114, 94, 0}, 0, 0, 0 };
Chris@82 1805
Chris@82 1806 void X(codelet_t1_32) (planner *p) {
Chris@82 1807 X(kdft_dit_register) (p, t1_32, &desc);
Chris@82 1808 }
Chris@82 1809 #endif