annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:33 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E Tf, T5K, T7k, T8k, T7N, T8x, T1i, T3i, T2L, T3v, T4v, T5f, T6m, T6T, T42;
Chris@82 50 E T52, TZ, T6X, T1X, T3p, T8p, T8B, T26, T3o, T4n, T58, T7z, T7T, T4k, T59;
Chris@82 51 E T6a, T6p, TK, T6W, T2o, T3m, T8s, T8A, T2x, T3l, T4g, T55, T7G, T7S, T4d;
Chris@82 52 E T56, T61, T6o, Tu, T6f, T7r, T8y, T7Q, T8l, T1F, T3w, T2O, T3j, T4y, T53;
Chris@82 53 E T5R, T6U, T49, T5g;
Chris@82 54 {
Chris@82 55 E T3, T12, T6, T2D, T2G, T6g, T15, T6h, Td, T6k, T1g, T2J, Ta, T6j, T1b;
Chris@82 56 E T2I;
Chris@82 57 {
Chris@82 58 E T1, T2, T13, T14;
Chris@82 59 T1 = cr[0];
Chris@82 60 T2 = ci[WS(rs, 15)];
Chris@82 61 T3 = T1 + T2;
Chris@82 62 T12 = T1 - T2;
Chris@82 63 {
Chris@82 64 E T4, T5, T2E, T2F;
Chris@82 65 T4 = cr[WS(rs, 8)];
Chris@82 66 T5 = ci[WS(rs, 7)];
Chris@82 67 T6 = T4 + T5;
Chris@82 68 T2D = T4 - T5;
Chris@82 69 T2E = ci[WS(rs, 31)];
Chris@82 70 T2F = cr[WS(rs, 16)];
Chris@82 71 T2G = T2E + T2F;
Chris@82 72 T6g = T2E - T2F;
Chris@82 73 }
Chris@82 74 T13 = ci[WS(rs, 23)];
Chris@82 75 T14 = cr[WS(rs, 24)];
Chris@82 76 T15 = T13 + T14;
Chris@82 77 T6h = T13 - T14;
Chris@82 78 {
Chris@82 79 E Tb, Tc, T1c, T1d, T1e, T1f;
Chris@82 80 Tb = ci[WS(rs, 3)];
Chris@82 81 Tc = cr[WS(rs, 12)];
Chris@82 82 T1c = Tb - Tc;
Chris@82 83 T1d = ci[WS(rs, 19)];
Chris@82 84 T1e = cr[WS(rs, 28)];
Chris@82 85 T1f = T1d + T1e;
Chris@82 86 Td = Tb + Tc;
Chris@82 87 T6k = T1d - T1e;
Chris@82 88 T1g = T1c - T1f;
Chris@82 89 T2J = T1c + T1f;
Chris@82 90 }
Chris@82 91 {
Chris@82 92 E T8, T9, T17, T18, T19, T1a;
Chris@82 93 T8 = cr[WS(rs, 4)];
Chris@82 94 T9 = ci[WS(rs, 11)];
Chris@82 95 T17 = T8 - T9;
Chris@82 96 T18 = ci[WS(rs, 27)];
Chris@82 97 T19 = cr[WS(rs, 20)];
Chris@82 98 T1a = T18 + T19;
Chris@82 99 Ta = T8 + T9;
Chris@82 100 T6j = T18 - T19;
Chris@82 101 T1b = T17 - T1a;
Chris@82 102 T2I = T17 + T1a;
Chris@82 103 }
Chris@82 104 }
Chris@82 105 {
Chris@82 106 E T7, Te, T7i, T7j;
Chris@82 107 T7 = T3 + T6;
Chris@82 108 Te = Ta + Td;
Chris@82 109 Tf = T7 + Te;
Chris@82 110 T5K = T7 - Te;
Chris@82 111 T7i = T3 - T6;
Chris@82 112 T7j = T6k - T6j;
Chris@82 113 T7k = T7i - T7j;
Chris@82 114 T8k = T7i + T7j;
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E T7L, T7M, T16, T1h;
Chris@82 118 T7L = T6g - T6h;
Chris@82 119 T7M = Ta - Td;
Chris@82 120 T7N = T7L - T7M;
Chris@82 121 T8x = T7M + T7L;
Chris@82 122 T16 = T12 - T15;
Chris@82 123 T1h = T1b + T1g;
Chris@82 124 T1i = FNMS(KP707106781, T1h, T16);
Chris@82 125 T3i = FMA(KP707106781, T1h, T16);
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E T2H, T2K, T4t, T4u;
Chris@82 129 T2H = T2D + T2G;
Chris@82 130 T2K = T2I - T2J;
Chris@82 131 T2L = FNMS(KP707106781, T2K, T2H);
Chris@82 132 T3v = FMA(KP707106781, T2K, T2H);
Chris@82 133 T4t = T2G - T2D;
Chris@82 134 T4u = T1b - T1g;
Chris@82 135 T4v = FMA(KP707106781, T4u, T4t);
Chris@82 136 T5f = FNMS(KP707106781, T4u, T4t);
Chris@82 137 }
Chris@82 138 {
Chris@82 139 E T6i, T6l, T40, T41;
Chris@82 140 T6i = T6g + T6h;
Chris@82 141 T6l = T6j + T6k;
Chris@82 142 T6m = T6i - T6l;
Chris@82 143 T6T = T6i + T6l;
Chris@82 144 T40 = T12 + T15;
Chris@82 145 T41 = T2I + T2J;
Chris@82 146 T42 = FNMS(KP707106781, T41, T40);
Chris@82 147 T52 = FMA(KP707106781, T41, T40);
Chris@82 148 }
Chris@82 149 }
Chris@82 150 {
Chris@82 151 E TR, T7w, T1H, T1Y, T1K, T7t, T21, T65, TY, T7u, T7x, T1Q, T1V, T24, T68;
Chris@82 152 E T23, T7v, T7y;
Chris@82 153 {
Chris@82 154 E TL, TM, TN, TO, TP, TQ;
Chris@82 155 TL = ci[0];
Chris@82 156 TM = cr[WS(rs, 15)];
Chris@82 157 TN = TL + TM;
Chris@82 158 TO = cr[WS(rs, 7)];
Chris@82 159 TP = ci[WS(rs, 8)];
Chris@82 160 TQ = TO + TP;
Chris@82 161 TR = TN + TQ;
Chris@82 162 T7w = TN - TQ;
Chris@82 163 T1H = TO - TP;
Chris@82 164 T1Y = TL - TM;
Chris@82 165 }
Chris@82 166 {
Chris@82 167 E T1I, T1J, T63, T1Z, T20, T64;
Chris@82 168 T1I = ci[WS(rs, 16)];
Chris@82 169 T1J = cr[WS(rs, 31)];
Chris@82 170 T63 = T1I - T1J;
Chris@82 171 T1Z = ci[WS(rs, 24)];
Chris@82 172 T20 = cr[WS(rs, 23)];
Chris@82 173 T64 = T1Z - T20;
Chris@82 174 T1K = T1I + T1J;
Chris@82 175 T7t = T63 - T64;
Chris@82 176 T21 = T1Z + T20;
Chris@82 177 T65 = T63 + T64;
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E TU, T1M, T1U, T67, TX, T1R, T1P, T66;
Chris@82 181 {
Chris@82 182 E TS, TT, T1S, T1T;
Chris@82 183 TS = cr[WS(rs, 3)];
Chris@82 184 TT = ci[WS(rs, 12)];
Chris@82 185 TU = TS + TT;
Chris@82 186 T1M = TS - TT;
Chris@82 187 T1S = ci[WS(rs, 20)];
Chris@82 188 T1T = cr[WS(rs, 27)];
Chris@82 189 T1U = T1S + T1T;
Chris@82 190 T67 = T1S - T1T;
Chris@82 191 }
Chris@82 192 {
Chris@82 193 E TV, TW, T1N, T1O;
Chris@82 194 TV = ci[WS(rs, 4)];
Chris@82 195 TW = cr[WS(rs, 11)];
Chris@82 196 TX = TV + TW;
Chris@82 197 T1R = TV - TW;
Chris@82 198 T1N = ci[WS(rs, 28)];
Chris@82 199 T1O = cr[WS(rs, 19)];
Chris@82 200 T1P = T1N + T1O;
Chris@82 201 T66 = T1N - T1O;
Chris@82 202 }
Chris@82 203 TY = TU + TX;
Chris@82 204 T7u = TU - TX;
Chris@82 205 T7x = T67 - T66;
Chris@82 206 T1Q = T1M + T1P;
Chris@82 207 T1V = T1R + T1U;
Chris@82 208 T24 = T1R - T1U;
Chris@82 209 T68 = T66 + T67;
Chris@82 210 T23 = T1M - T1P;
Chris@82 211 }
Chris@82 212 TZ = TR + TY;
Chris@82 213 T6X = T65 + T68;
Chris@82 214 {
Chris@82 215 E T1L, T1W, T8n, T8o;
Chris@82 216 T1L = T1H - T1K;
Chris@82 217 T1W = T1Q - T1V;
Chris@82 218 T1X = FNMS(KP707106781, T1W, T1L);
Chris@82 219 T3p = FMA(KP707106781, T1W, T1L);
Chris@82 220 T8n = T7u + T7t;
Chris@82 221 T8o = T7w + T7x;
Chris@82 222 T8p = FNMS(KP414213562, T8o, T8n);
Chris@82 223 T8B = FMA(KP414213562, T8n, T8o);
Chris@82 224 }
Chris@82 225 {
Chris@82 226 E T22, T25, T4l, T4m;
Chris@82 227 T22 = T1Y - T21;
Chris@82 228 T25 = T23 + T24;
Chris@82 229 T26 = FNMS(KP707106781, T25, T22);
Chris@82 230 T3o = FMA(KP707106781, T25, T22);
Chris@82 231 T4l = T1H + T1K;
Chris@82 232 T4m = T23 - T24;
Chris@82 233 T4n = FNMS(KP707106781, T4m, T4l);
Chris@82 234 T58 = FMA(KP707106781, T4m, T4l);
Chris@82 235 }
Chris@82 236 T7v = T7t - T7u;
Chris@82 237 T7y = T7w - T7x;
Chris@82 238 T7z = FMA(KP414213562, T7y, T7v);
Chris@82 239 T7T = FNMS(KP414213562, T7v, T7y);
Chris@82 240 {
Chris@82 241 E T4i, T4j, T62, T69;
Chris@82 242 T4i = T1Y + T21;
Chris@82 243 T4j = T1Q + T1V;
Chris@82 244 T4k = FNMS(KP707106781, T4j, T4i);
Chris@82 245 T59 = FMA(KP707106781, T4j, T4i);
Chris@82 246 T62 = TR - TY;
Chris@82 247 T69 = T65 - T68;
Chris@82 248 T6a = T62 + T69;
Chris@82 249 T6p = T69 - T62;
Chris@82 250 }
Chris@82 251 }
Chris@82 252 {
Chris@82 253 E TC, T7D, T28, T2p, T2b, T7A, T2s, T5W, TJ, T7B, T7E, T2h, T2m, T2v, T5Z;
Chris@82 254 E T2u, T7C, T7F;
Chris@82 255 {
Chris@82 256 E Tw, Tx, Ty, Tz, TA, TB;
Chris@82 257 Tw = cr[WS(rs, 1)];
Chris@82 258 Tx = ci[WS(rs, 14)];
Chris@82 259 Ty = Tw + Tx;
Chris@82 260 Tz = cr[WS(rs, 9)];
Chris@82 261 TA = ci[WS(rs, 6)];
Chris@82 262 TB = Tz + TA;
Chris@82 263 TC = Ty + TB;
Chris@82 264 T7D = Ty - TB;
Chris@82 265 T28 = Tz - TA;
Chris@82 266 T2p = Tw - Tx;
Chris@82 267 }
Chris@82 268 {
Chris@82 269 E T29, T2a, T5U, T2q, T2r, T5V;
Chris@82 270 T29 = ci[WS(rs, 30)];
Chris@82 271 T2a = cr[WS(rs, 17)];
Chris@82 272 T5U = T29 - T2a;
Chris@82 273 T2q = ci[WS(rs, 22)];
Chris@82 274 T2r = cr[WS(rs, 25)];
Chris@82 275 T5V = T2q - T2r;
Chris@82 276 T2b = T29 + T2a;
Chris@82 277 T7A = T5U - T5V;
Chris@82 278 T2s = T2q + T2r;
Chris@82 279 T5W = T5U + T5V;
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E TF, T2d, T2l, T5Y, TI, T2i, T2g, T5X;
Chris@82 283 {
Chris@82 284 E TD, TE, T2j, T2k;
Chris@82 285 TD = cr[WS(rs, 5)];
Chris@82 286 TE = ci[WS(rs, 10)];
Chris@82 287 TF = TD + TE;
Chris@82 288 T2d = TD - TE;
Chris@82 289 T2j = ci[WS(rs, 18)];
Chris@82 290 T2k = cr[WS(rs, 29)];
Chris@82 291 T2l = T2j + T2k;
Chris@82 292 T5Y = T2j - T2k;
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E TG, TH, T2e, T2f;
Chris@82 296 TG = ci[WS(rs, 2)];
Chris@82 297 TH = cr[WS(rs, 13)];
Chris@82 298 TI = TG + TH;
Chris@82 299 T2i = TG - TH;
Chris@82 300 T2e = ci[WS(rs, 26)];
Chris@82 301 T2f = cr[WS(rs, 21)];
Chris@82 302 T2g = T2e + T2f;
Chris@82 303 T5X = T2e - T2f;
Chris@82 304 }
Chris@82 305 TJ = TF + TI;
Chris@82 306 T7B = TF - TI;
Chris@82 307 T7E = T5Y - T5X;
Chris@82 308 T2h = T2d + T2g;
Chris@82 309 T2m = T2i + T2l;
Chris@82 310 T2v = T2i - T2l;
Chris@82 311 T5Z = T5X + T5Y;
Chris@82 312 T2u = T2d - T2g;
Chris@82 313 }
Chris@82 314 TK = TC + TJ;
Chris@82 315 T6W = T5W + T5Z;
Chris@82 316 {
Chris@82 317 E T2c, T2n, T8q, T8r;
Chris@82 318 T2c = T28 + T2b;
Chris@82 319 T2n = T2h - T2m;
Chris@82 320 T2o = FNMS(KP707106781, T2n, T2c);
Chris@82 321 T3m = FMA(KP707106781, T2n, T2c);
Chris@82 322 T8q = T7B + T7A;
Chris@82 323 T8r = T7D + T7E;
Chris@82 324 T8s = FMA(KP414213562, T8r, T8q);
Chris@82 325 T8A = FNMS(KP414213562, T8q, T8r);
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E T2t, T2w, T4e, T4f;
Chris@82 329 T2t = T2p - T2s;
Chris@82 330 T2w = T2u + T2v;
Chris@82 331 T2x = FNMS(KP707106781, T2w, T2t);
Chris@82 332 T3l = FMA(KP707106781, T2w, T2t);
Chris@82 333 T4e = T2b - T28;
Chris@82 334 T4f = T2v - T2u;
Chris@82 335 T4g = FNMS(KP707106781, T4f, T4e);
Chris@82 336 T55 = FMA(KP707106781, T4f, T4e);
Chris@82 337 }
Chris@82 338 T7C = T7A - T7B;
Chris@82 339 T7F = T7D - T7E;
Chris@82 340 T7G = FNMS(KP414213562, T7F, T7C);
Chris@82 341 T7S = FMA(KP414213562, T7C, T7F);
Chris@82 342 {
Chris@82 343 E T4b, T4c, T5T, T60;
Chris@82 344 T4b = T2p + T2s;
Chris@82 345 T4c = T2h + T2m;
Chris@82 346 T4d = FNMS(KP707106781, T4c, T4b);
Chris@82 347 T56 = FMA(KP707106781, T4c, T4b);
Chris@82 348 T5T = TC - TJ;
Chris@82 349 T60 = T5W - T5Z;
Chris@82 350 T61 = T5T - T60;
Chris@82 351 T6o = T5T + T60;
Chris@82 352 }
Chris@82 353 }
Chris@82 354 {
Chris@82 355 E Ti, T5P, Tl, T5O, T1y, T1D, T7p, T7o, T44, T43, Tp, T5M, Ts, T5L, T1n;
Chris@82 356 E T1s, T7m, T7l, T47, T46;
Chris@82 357 {
Chris@82 358 E T1z, T1C, T1u, T1x;
Chris@82 359 {
Chris@82 360 E Tg, Th, T1A, T1B;
Chris@82 361 Tg = cr[WS(rs, 2)];
Chris@82 362 Th = ci[WS(rs, 13)];
Chris@82 363 Ti = Tg + Th;
Chris@82 364 T1z = Tg - Th;
Chris@82 365 T1A = ci[WS(rs, 21)];
Chris@82 366 T1B = cr[WS(rs, 26)];
Chris@82 367 T1C = T1A + T1B;
Chris@82 368 T5P = T1A - T1B;
Chris@82 369 }
Chris@82 370 {
Chris@82 371 E Tj, Tk, T1v, T1w;
Chris@82 372 Tj = cr[WS(rs, 10)];
Chris@82 373 Tk = ci[WS(rs, 5)];
Chris@82 374 Tl = Tj + Tk;
Chris@82 375 T1u = Tj - Tk;
Chris@82 376 T1v = ci[WS(rs, 29)];
Chris@82 377 T1w = cr[WS(rs, 18)];
Chris@82 378 T1x = T1v + T1w;
Chris@82 379 T5O = T1v - T1w;
Chris@82 380 }
Chris@82 381 T1y = T1u + T1x;
Chris@82 382 T1D = T1z - T1C;
Chris@82 383 T7p = T5O - T5P;
Chris@82 384 T7o = Ti - Tl;
Chris@82 385 T44 = T1z + T1C;
Chris@82 386 T43 = T1x - T1u;
Chris@82 387 }
Chris@82 388 {
Chris@82 389 E T1o, T1r, T1j, T1m;
Chris@82 390 {
Chris@82 391 E Tn, To, T1p, T1q;
Chris@82 392 Tn = ci[WS(rs, 1)];
Chris@82 393 To = cr[WS(rs, 14)];
Chris@82 394 Tp = Tn + To;
Chris@82 395 T1o = Tn - To;
Chris@82 396 T1p = ci[WS(rs, 25)];
Chris@82 397 T1q = cr[WS(rs, 22)];
Chris@82 398 T1r = T1p + T1q;
Chris@82 399 T5M = T1p - T1q;
Chris@82 400 }
Chris@82 401 {
Chris@82 402 E Tq, Tr, T1k, T1l;
Chris@82 403 Tq = cr[WS(rs, 6)];
Chris@82 404 Tr = ci[WS(rs, 9)];
Chris@82 405 Ts = Tq + Tr;
Chris@82 406 T1j = Tq - Tr;
Chris@82 407 T1k = ci[WS(rs, 17)];
Chris@82 408 T1l = cr[WS(rs, 30)];
Chris@82 409 T1m = T1k + T1l;
Chris@82 410 T5L = T1k - T1l;
Chris@82 411 }
Chris@82 412 T1n = T1j - T1m;
Chris@82 413 T1s = T1o - T1r;
Chris@82 414 T7m = Tp - Ts;
Chris@82 415 T7l = T5L - T5M;
Chris@82 416 T47 = T1o + T1r;
Chris@82 417 T46 = T1j + T1m;
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E Tm, Tt, T7n, T7q;
Chris@82 421 Tm = Ti + Tl;
Chris@82 422 Tt = Tp + Ts;
Chris@82 423 Tu = Tm + Tt;
Chris@82 424 T6f = Tm - Tt;
Chris@82 425 T7n = T7l - T7m;
Chris@82 426 T7q = T7o + T7p;
Chris@82 427 T7r = T7n - T7q;
Chris@82 428 T8y = T7q + T7n;
Chris@82 429 }
Chris@82 430 {
Chris@82 431 E T7O, T7P, T1t, T1E;
Chris@82 432 T7O = T7o - T7p;
Chris@82 433 T7P = T7m + T7l;
Chris@82 434 T7Q = T7O - T7P;
Chris@82 435 T8l = T7O + T7P;
Chris@82 436 T1t = FNMS(KP414213562, T1s, T1n);
Chris@82 437 T1E = FMA(KP414213562, T1D, T1y);
Chris@82 438 T1F = T1t - T1E;
Chris@82 439 T3w = T1E + T1t;
Chris@82 440 }
Chris@82 441 {
Chris@82 442 E T2M, T2N, T4w, T4x;
Chris@82 443 T2M = FNMS(KP414213562, T1y, T1D);
Chris@82 444 T2N = FMA(KP414213562, T1n, T1s);
Chris@82 445 T2O = T2M - T2N;
Chris@82 446 T3j = T2M + T2N;
Chris@82 447 T4w = FMA(KP414213562, T43, T44);
Chris@82 448 T4x = FMA(KP414213562, T46, T47);
Chris@82 449 T4y = T4w - T4x;
Chris@82 450 T53 = T4w + T4x;
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E T5N, T5Q, T45, T48;
Chris@82 454 T5N = T5L + T5M;
Chris@82 455 T5Q = T5O + T5P;
Chris@82 456 T5R = T5N - T5Q;
Chris@82 457 T6U = T5Q + T5N;
Chris@82 458 T45 = FNMS(KP414213562, T44, T43);
Chris@82 459 T48 = FNMS(KP414213562, T47, T46);
Chris@82 460 T49 = T45 + T48;
Chris@82 461 T5g = T48 - T45;
Chris@82 462 }
Chris@82 463 }
Chris@82 464 {
Chris@82 465 E Tv, T10, T6Q, T6V, T6Y, T6Z;
Chris@82 466 Tv = Tf + Tu;
Chris@82 467 T10 = TK + TZ;
Chris@82 468 T6Q = Tv - T10;
Chris@82 469 T6V = T6T + T6U;
Chris@82 470 T6Y = T6W + T6X;
Chris@82 471 T6Z = T6V - T6Y;
Chris@82 472 cr[0] = Tv + T10;
Chris@82 473 ci[0] = T6V + T6Y;
Chris@82 474 {
Chris@82 475 E T6P, T6R, T6S, T70;
Chris@82 476 T6P = W[30];
Chris@82 477 T6R = T6P * T6Q;
Chris@82 478 T6S = W[31];
Chris@82 479 T70 = T6S * T6Q;
Chris@82 480 cr[WS(rs, 16)] = FNMS(T6S, T6Z, T6R);
Chris@82 481 ci[WS(rs, 16)] = FMA(T6P, T6Z, T70);
Chris@82 482 }
Chris@82 483 }
Chris@82 484 {
Chris@82 485 E T8O, T8W, T8T, T8Z;
Chris@82 486 {
Chris@82 487 E T8M, T8N, T8R, T8S;
Chris@82 488 T8M = FMA(KP707106781, T8l, T8k);
Chris@82 489 T8N = T8A + T8B;
Chris@82 490 T8O = FNMS(KP923879532, T8N, T8M);
Chris@82 491 T8W = FMA(KP923879532, T8N, T8M);
Chris@82 492 T8R = FMA(KP707106781, T8y, T8x);
Chris@82 493 T8S = T8s + T8p;
Chris@82 494 T8T = FNMS(KP923879532, T8S, T8R);
Chris@82 495 T8Z = FMA(KP923879532, T8S, T8R);
Chris@82 496 }
Chris@82 497 {
Chris@82 498 E T8P, T8U, T8L, T8Q;
Chris@82 499 T8L = W[34];
Chris@82 500 T8P = T8L * T8O;
Chris@82 501 T8U = T8L * T8T;
Chris@82 502 T8Q = W[35];
Chris@82 503 cr[WS(rs, 18)] = FNMS(T8Q, T8T, T8P);
Chris@82 504 ci[WS(rs, 18)] = FMA(T8Q, T8O, T8U);
Chris@82 505 }
Chris@82 506 {
Chris@82 507 E T8X, T90, T8V, T8Y;
Chris@82 508 T8V = W[2];
Chris@82 509 T8X = T8V * T8W;
Chris@82 510 T90 = T8V * T8Z;
Chris@82 511 T8Y = W[3];
Chris@82 512 cr[WS(rs, 2)] = FNMS(T8Y, T8Z, T8X);
Chris@82 513 ci[WS(rs, 2)] = FMA(T8Y, T8W, T90);
Chris@82 514 }
Chris@82 515 }
Chris@82 516 {
Chris@82 517 E T86, T8e, T8b, T8h;
Chris@82 518 {
Chris@82 519 E T84, T85, T89, T8a;
Chris@82 520 T84 = FNMS(KP707106781, T7r, T7k);
Chris@82 521 T85 = T7S + T7T;
Chris@82 522 T86 = FNMS(KP923879532, T85, T84);
Chris@82 523 T8e = FMA(KP923879532, T85, T84);
Chris@82 524 T89 = FNMS(KP707106781, T7Q, T7N);
Chris@82 525 T8a = T7G + T7z;
Chris@82 526 T8b = FNMS(KP923879532, T8a, T89);
Chris@82 527 T8h = FMA(KP923879532, T8a, T89);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T87, T8c, T83, T88;
Chris@82 531 T83 = W[26];
Chris@82 532 T87 = T83 * T86;
Chris@82 533 T8c = T83 * T8b;
Chris@82 534 T88 = W[27];
Chris@82 535 cr[WS(rs, 14)] = FNMS(T88, T8b, T87);
Chris@82 536 ci[WS(rs, 14)] = FMA(T88, T86, T8c);
Chris@82 537 }
Chris@82 538 {
Chris@82 539 E T8f, T8i, T8d, T8g;
Chris@82 540 T8d = W[58];
Chris@82 541 T8f = T8d * T8e;
Chris@82 542 T8i = T8d * T8h;
Chris@82 543 T8g = W[59];
Chris@82 544 cr[WS(rs, 30)] = FNMS(T8g, T8h, T8f);
Chris@82 545 ci[WS(rs, 30)] = FMA(T8g, T8e, T8i);
Chris@82 546 }
Chris@82 547 }
Chris@82 548 {
Chris@82 549 E T6C, T6K, T6H, T6N;
Chris@82 550 {
Chris@82 551 E T6A, T6B, T6F, T6G;
Chris@82 552 T6A = T5K - T5R;
Chris@82 553 T6B = T6p - T6o;
Chris@82 554 T6C = FNMS(KP707106781, T6B, T6A);
Chris@82 555 T6K = FMA(KP707106781, T6B, T6A);
Chris@82 556 T6F = T6m - T6f;
Chris@82 557 T6G = T61 - T6a;
Chris@82 558 T6H = FNMS(KP707106781, T6G, T6F);
Chris@82 559 T6N = FMA(KP707106781, T6G, T6F);
Chris@82 560 }
Chris@82 561 {
Chris@82 562 E T6D, T6I, T6z, T6E;
Chris@82 563 T6z = W[54];
Chris@82 564 T6D = T6z * T6C;
Chris@82 565 T6I = T6z * T6H;
Chris@82 566 T6E = W[55];
Chris@82 567 cr[WS(rs, 28)] = FNMS(T6E, T6H, T6D);
Chris@82 568 ci[WS(rs, 28)] = FMA(T6E, T6C, T6I);
Chris@82 569 }
Chris@82 570 {
Chris@82 571 E T6L, T6O, T6J, T6M;
Chris@82 572 T6J = W[22];
Chris@82 573 T6L = T6J * T6K;
Chris@82 574 T6O = T6J * T6N;
Chris@82 575 T6M = W[23];
Chris@82 576 cr[WS(rs, 12)] = FNMS(T6M, T6N, T6L);
Chris@82 577 ci[WS(rs, 12)] = FMA(T6M, T6K, T6O);
Chris@82 578 }
Chris@82 579 }
Chris@82 580 {
Chris@82 581 E T8u, T8G, T8D, T8J;
Chris@82 582 {
Chris@82 583 E T8m, T8t, T8z, T8C;
Chris@82 584 T8m = FNMS(KP707106781, T8l, T8k);
Chris@82 585 T8t = T8p - T8s;
Chris@82 586 T8u = FNMS(KP923879532, T8t, T8m);
Chris@82 587 T8G = FMA(KP923879532, T8t, T8m);
Chris@82 588 T8z = FNMS(KP707106781, T8y, T8x);
Chris@82 589 T8C = T8A - T8B;
Chris@82 590 T8D = FNMS(KP923879532, T8C, T8z);
Chris@82 591 T8J = FMA(KP923879532, T8C, T8z);
Chris@82 592 }
Chris@82 593 {
Chris@82 594 E T8j, T8v, T8w, T8E;
Chris@82 595 T8j = W[50];
Chris@82 596 T8v = T8j * T8u;
Chris@82 597 T8w = W[51];
Chris@82 598 T8E = T8w * T8u;
Chris@82 599 cr[WS(rs, 26)] = FNMS(T8w, T8D, T8v);
Chris@82 600 ci[WS(rs, 26)] = FMA(T8j, T8D, T8E);
Chris@82 601 }
Chris@82 602 {
Chris@82 603 E T8F, T8H, T8I, T8K;
Chris@82 604 T8F = W[18];
Chris@82 605 T8H = T8F * T8G;
Chris@82 606 T8I = W[19];
Chris@82 607 T8K = T8I * T8G;
Chris@82 608 cr[WS(rs, 10)] = FNMS(T8I, T8J, T8H);
Chris@82 609 ci[WS(rs, 10)] = FMA(T8F, T8J, T8K);
Chris@82 610 }
Chris@82 611 }
Chris@82 612 {
Chris@82 613 E T6c, T6u, T6r, T6x;
Chris@82 614 {
Chris@82 615 E T5S, T6b, T6n, T6q;
Chris@82 616 T5S = T5K + T5R;
Chris@82 617 T6b = T61 + T6a;
Chris@82 618 T6c = FNMS(KP707106781, T6b, T5S);
Chris@82 619 T6u = FMA(KP707106781, T6b, T5S);
Chris@82 620 T6n = T6f + T6m;
Chris@82 621 T6q = T6o + T6p;
Chris@82 622 T6r = FNMS(KP707106781, T6q, T6n);
Chris@82 623 T6x = FMA(KP707106781, T6q, T6n);
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T5J, T6d, T6e, T6s;
Chris@82 627 T5J = W[38];
Chris@82 628 T6d = T5J * T6c;
Chris@82 629 T6e = W[39];
Chris@82 630 T6s = T6e * T6c;
Chris@82 631 cr[WS(rs, 20)] = FNMS(T6e, T6r, T6d);
Chris@82 632 ci[WS(rs, 20)] = FMA(T5J, T6r, T6s);
Chris@82 633 }
Chris@82 634 {
Chris@82 635 E T6t, T6v, T6w, T6y;
Chris@82 636 T6t = W[6];
Chris@82 637 T6v = T6t * T6u;
Chris@82 638 T6w = W[7];
Chris@82 639 T6y = T6w * T6u;
Chris@82 640 cr[WS(rs, 4)] = FNMS(T6w, T6x, T6v);
Chris@82 641 ci[WS(rs, 4)] = FMA(T6t, T6x, T6y);
Chris@82 642 }
Chris@82 643 }
Chris@82 644 {
Chris@82 645 E T74, T7c, T79, T7f;
Chris@82 646 {
Chris@82 647 E T72, T73, T77, T78;
Chris@82 648 T72 = Tf - Tu;
Chris@82 649 T73 = T6X - T6W;
Chris@82 650 T74 = T72 - T73;
Chris@82 651 T7c = T72 + T73;
Chris@82 652 T77 = T6T - T6U;
Chris@82 653 T78 = TK - TZ;
Chris@82 654 T79 = T77 - T78;
Chris@82 655 T7f = T78 + T77;
Chris@82 656 }
Chris@82 657 {
Chris@82 658 E T75, T7a, T71, T76;
Chris@82 659 T71 = W[46];
Chris@82 660 T75 = T71 * T74;
Chris@82 661 T7a = T71 * T79;
Chris@82 662 T76 = W[47];
Chris@82 663 cr[WS(rs, 24)] = FNMS(T76, T79, T75);
Chris@82 664 ci[WS(rs, 24)] = FMA(T76, T74, T7a);
Chris@82 665 }
Chris@82 666 {
Chris@82 667 E T7d, T7g, T7b, T7e;
Chris@82 668 T7b = W[14];
Chris@82 669 T7d = T7b * T7c;
Chris@82 670 T7g = T7b * T7f;
Chris@82 671 T7e = W[15];
Chris@82 672 cr[WS(rs, 8)] = FNMS(T7e, T7f, T7d);
Chris@82 673 ci[WS(rs, 8)] = FMA(T7e, T7c, T7g);
Chris@82 674 }
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E T7I, T7Y, T7V, T81;
Chris@82 678 {
Chris@82 679 E T7s, T7H, T7R, T7U;
Chris@82 680 T7s = FMA(KP707106781, T7r, T7k);
Chris@82 681 T7H = T7z - T7G;
Chris@82 682 T7I = FNMS(KP923879532, T7H, T7s);
Chris@82 683 T7Y = FMA(KP923879532, T7H, T7s);
Chris@82 684 T7R = FMA(KP707106781, T7Q, T7N);
Chris@82 685 T7U = T7S - T7T;
Chris@82 686 T7V = FNMS(KP923879532, T7U, T7R);
Chris@82 687 T81 = FMA(KP923879532, T7U, T7R);
Chris@82 688 }
Chris@82 689 {
Chris@82 690 E T7h, T7J, T7K, T7W;
Chris@82 691 T7h = W[42];
Chris@82 692 T7J = T7h * T7I;
Chris@82 693 T7K = W[43];
Chris@82 694 T7W = T7K * T7I;
Chris@82 695 cr[WS(rs, 22)] = FNMS(T7K, T7V, T7J);
Chris@82 696 ci[WS(rs, 22)] = FMA(T7h, T7V, T7W);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E T7X, T7Z, T80, T82;
Chris@82 700 T7X = W[10];
Chris@82 701 T7Z = T7X * T7Y;
Chris@82 702 T80 = W[11];
Chris@82 703 T82 = T80 * T7Y;
Chris@82 704 cr[WS(rs, 6)] = FNMS(T80, T81, T7Z);
Chris@82 705 ci[WS(rs, 6)] = FMA(T7X, T81, T82);
Chris@82 706 }
Chris@82 707 }
Chris@82 708 {
Chris@82 709 E T37, T2A, T38, T2W, T2T, T3c, T2Z, T34;
Chris@82 710 T37 = FNMS(KP923879532, T2O, T2L);
Chris@82 711 {
Chris@82 712 E T1G, T27, T2y, T2z;
Chris@82 713 T1G = FMA(KP923879532, T1F, T1i);
Chris@82 714 T27 = FMA(KP668178637, T26, T1X);
Chris@82 715 T2y = FNMS(KP668178637, T2x, T2o);
Chris@82 716 T2z = T27 - T2y;
Chris@82 717 T2A = FNMS(KP831469612, T2z, T1G);
Chris@82 718 T38 = T2y + T27;
Chris@82 719 T2W = FMA(KP831469612, T2z, T1G);
Chris@82 720 }
Chris@82 721 {
Chris@82 722 E T2P, T32, T2S, T33, T2Q, T2R;
Chris@82 723 T2P = FMA(KP923879532, T2O, T2L);
Chris@82 724 T32 = FNMS(KP923879532, T1F, T1i);
Chris@82 725 T2Q = FMA(KP668178637, T2o, T2x);
Chris@82 726 T2R = FNMS(KP668178637, T1X, T26);
Chris@82 727 T2S = T2Q - T2R;
Chris@82 728 T33 = T2Q + T2R;
Chris@82 729 T2T = FNMS(KP831469612, T2S, T2P);
Chris@82 730 T3c = FMA(KP831469612, T33, T32);
Chris@82 731 T2Z = FMA(KP831469612, T2S, T2P);
Chris@82 732 T34 = FNMS(KP831469612, T33, T32);
Chris@82 733 }
Chris@82 734 {
Chris@82 735 E T2B, T2U, T11, T2C;
Chris@82 736 T11 = W[40];
Chris@82 737 T2B = T11 * T2A;
Chris@82 738 T2U = T11 * T2T;
Chris@82 739 T2C = W[41];
Chris@82 740 cr[WS(rs, 21)] = FNMS(T2C, T2T, T2B);
Chris@82 741 ci[WS(rs, 21)] = FMA(T2C, T2A, T2U);
Chris@82 742 }
Chris@82 743 {
Chris@82 744 E T2X, T30, T2V, T2Y;
Chris@82 745 T2V = W[8];
Chris@82 746 T2X = T2V * T2W;
Chris@82 747 T30 = T2V * T2Z;
Chris@82 748 T2Y = W[9];
Chris@82 749 cr[WS(rs, 5)] = FNMS(T2Y, T2Z, T2X);
Chris@82 750 ci[WS(rs, 5)] = FMA(T2Y, T2W, T30);
Chris@82 751 }
Chris@82 752 {
Chris@82 753 E T39, T36, T3a, T31, T35;
Chris@82 754 T39 = FNMS(KP831469612, T38, T37);
Chris@82 755 T36 = W[25];
Chris@82 756 T3a = T36 * T34;
Chris@82 757 T31 = W[24];
Chris@82 758 T35 = T31 * T34;
Chris@82 759 cr[WS(rs, 13)] = FNMS(T36, T39, T35);
Chris@82 760 ci[WS(rs, 13)] = FMA(T31, T39, T3a);
Chris@82 761 }
Chris@82 762 {
Chris@82 763 E T3f, T3e, T3g, T3b, T3d;
Chris@82 764 T3f = FMA(KP831469612, T38, T37);
Chris@82 765 T3e = W[57];
Chris@82 766 T3g = T3e * T3c;
Chris@82 767 T3b = W[56];
Chris@82 768 T3d = T3b * T3c;
Chris@82 769 cr[WS(rs, 29)] = FNMS(T3e, T3f, T3d);
Chris@82 770 ci[WS(rs, 29)] = FMA(T3b, T3f, T3g);
Chris@82 771 }
Chris@82 772 }
Chris@82 773 {
Chris@82 774 E T4z, T4C, T4W, T4O, T4q, T4Z, T4G, T4T;
Chris@82 775 T4z = FMA(KP923879532, T4y, T4v);
Chris@82 776 {
Chris@82 777 E T4M, T4A, T4B, T4N;
Chris@82 778 T4M = FMA(KP923879532, T49, T42);
Chris@82 779 T4A = FMA(KP668178637, T4d, T4g);
Chris@82 780 T4B = FMA(KP668178637, T4k, T4n);
Chris@82 781 T4N = T4A + T4B;
Chris@82 782 T4C = T4A - T4B;
Chris@82 783 T4W = FMA(KP831469612, T4N, T4M);
Chris@82 784 T4O = FNMS(KP831469612, T4N, T4M);
Chris@82 785 }
Chris@82 786 {
Chris@82 787 E T4a, T4R, T4p, T4S, T4h, T4o;
Chris@82 788 T4a = FNMS(KP923879532, T49, T42);
Chris@82 789 T4R = FNMS(KP923879532, T4y, T4v);
Chris@82 790 T4h = FNMS(KP668178637, T4g, T4d);
Chris@82 791 T4o = FNMS(KP668178637, T4n, T4k);
Chris@82 792 T4p = T4h + T4o;
Chris@82 793 T4S = T4h - T4o;
Chris@82 794 T4q = FNMS(KP831469612, T4p, T4a);
Chris@82 795 T4Z = FNMS(KP831469612, T4S, T4R);
Chris@82 796 T4G = FMA(KP831469612, T4p, T4a);
Chris@82 797 T4T = FMA(KP831469612, T4S, T4R);
Chris@82 798 }
Chris@82 799 {
Chris@82 800 E T4P, T4U, T4L, T4Q;
Chris@82 801 T4L = W[20];
Chris@82 802 T4P = T4L * T4O;
Chris@82 803 T4U = T4L * T4T;
Chris@82 804 T4Q = W[21];
Chris@82 805 cr[WS(rs, 11)] = FNMS(T4Q, T4T, T4P);
Chris@82 806 ci[WS(rs, 11)] = FMA(T4Q, T4O, T4U);
Chris@82 807 }
Chris@82 808 {
Chris@82 809 E T4X, T50, T4V, T4Y;
Chris@82 810 T4V = W[52];
Chris@82 811 T4X = T4V * T4W;
Chris@82 812 T50 = T4V * T4Z;
Chris@82 813 T4Y = W[53];
Chris@82 814 cr[WS(rs, 27)] = FNMS(T4Y, T4Z, T4X);
Chris@82 815 ci[WS(rs, 27)] = FMA(T4Y, T4W, T50);
Chris@82 816 }
Chris@82 817 {
Chris@82 818 E T4D, T4s, T4E, T3Z, T4r;
Chris@82 819 T4D = FNMS(KP831469612, T4C, T4z);
Chris@82 820 T4s = W[37];
Chris@82 821 T4E = T4s * T4q;
Chris@82 822 T3Z = W[36];
Chris@82 823 T4r = T3Z * T4q;
Chris@82 824 cr[WS(rs, 19)] = FNMS(T4s, T4D, T4r);
Chris@82 825 ci[WS(rs, 19)] = FMA(T3Z, T4D, T4E);
Chris@82 826 }
Chris@82 827 {
Chris@82 828 E T4J, T4I, T4K, T4F, T4H;
Chris@82 829 T4J = FMA(KP831469612, T4C, T4z);
Chris@82 830 T4I = W[5];
Chris@82 831 T4K = T4I * T4G;
Chris@82 832 T4F = W[4];
Chris@82 833 T4H = T4F * T4G;
Chris@82 834 cr[WS(rs, 3)] = FNMS(T4I, T4J, T4H);
Chris@82 835 ci[WS(rs, 3)] = FMA(T4F, T4J, T4K);
Chris@82 836 }
Chris@82 837 }
Chris@82 838 {
Chris@82 839 E T3x, T3A, T3U, T3M, T3s, T3X, T3E, T3R;
Chris@82 840 T3x = FMA(KP923879532, T3w, T3v);
Chris@82 841 {
Chris@82 842 E T3K, T3y, T3z, T3L;
Chris@82 843 T3K = FNMS(KP923879532, T3j, T3i);
Chris@82 844 T3y = FMA(KP198912367, T3l, T3m);
Chris@82 845 T3z = FNMS(KP198912367, T3o, T3p);
Chris@82 846 T3L = T3z - T3y;
Chris@82 847 T3A = T3y + T3z;
Chris@82 848 T3U = FMA(KP980785280, T3L, T3K);
Chris@82 849 T3M = FNMS(KP980785280, T3L, T3K);
Chris@82 850 }
Chris@82 851 {
Chris@82 852 E T3k, T3P, T3r, T3Q, T3n, T3q;
Chris@82 853 T3k = FMA(KP923879532, T3j, T3i);
Chris@82 854 T3P = FNMS(KP923879532, T3w, T3v);
Chris@82 855 T3n = FNMS(KP198912367, T3m, T3l);
Chris@82 856 T3q = FMA(KP198912367, T3p, T3o);
Chris@82 857 T3r = T3n + T3q;
Chris@82 858 T3Q = T3n - T3q;
Chris@82 859 T3s = FNMS(KP980785280, T3r, T3k);
Chris@82 860 T3X = FMA(KP980785280, T3Q, T3P);
Chris@82 861 T3E = FMA(KP980785280, T3r, T3k);
Chris@82 862 T3R = FNMS(KP980785280, T3Q, T3P);
Chris@82 863 }
Chris@82 864 {
Chris@82 865 E T3N, T3S, T3J, T3O;
Chris@82 866 T3J = W[48];
Chris@82 867 T3N = T3J * T3M;
Chris@82 868 T3S = T3J * T3R;
Chris@82 869 T3O = W[49];
Chris@82 870 cr[WS(rs, 25)] = FNMS(T3O, T3R, T3N);
Chris@82 871 ci[WS(rs, 25)] = FMA(T3O, T3M, T3S);
Chris@82 872 }
Chris@82 873 {
Chris@82 874 E T3V, T3Y, T3T, T3W;
Chris@82 875 T3T = W[16];
Chris@82 876 T3V = T3T * T3U;
Chris@82 877 T3Y = T3T * T3X;
Chris@82 878 T3W = W[17];
Chris@82 879 cr[WS(rs, 9)] = FNMS(T3W, T3X, T3V);
Chris@82 880 ci[WS(rs, 9)] = FMA(T3W, T3U, T3Y);
Chris@82 881 }
Chris@82 882 {
Chris@82 883 E T3B, T3u, T3C, T3h, T3t;
Chris@82 884 T3B = FNMS(KP980785280, T3A, T3x);
Chris@82 885 T3u = W[33];
Chris@82 886 T3C = T3u * T3s;
Chris@82 887 T3h = W[32];
Chris@82 888 T3t = T3h * T3s;
Chris@82 889 cr[WS(rs, 17)] = FNMS(T3u, T3B, T3t);
Chris@82 890 ci[WS(rs, 17)] = FMA(T3h, T3B, T3C);
Chris@82 891 }
Chris@82 892 {
Chris@82 893 E T3H, T3G, T3I, T3D, T3F;
Chris@82 894 T3H = FMA(KP980785280, T3A, T3x);
Chris@82 895 T3G = W[1];
Chris@82 896 T3I = T3G * T3E;
Chris@82 897 T3D = W[0];
Chris@82 898 T3F = T3D * T3E;
Chris@82 899 cr[WS(rs, 1)] = FNMS(T3G, T3H, T3F);
Chris@82 900 ci[WS(rs, 1)] = FMA(T3D, T3H, T3I);
Chris@82 901 }
Chris@82 902 }
Chris@82 903 {
Chris@82 904 E T5h, T5k, T5E, T5w, T5c, T5H, T5o, T5B;
Chris@82 905 T5h = FMA(KP923879532, T5g, T5f);
Chris@82 906 {
Chris@82 907 E T5u, T5i, T5j, T5v;
Chris@82 908 T5u = FMA(KP923879532, T53, T52);
Chris@82 909 T5i = FMA(KP198912367, T55, T56);
Chris@82 910 T5j = FMA(KP198912367, T58, T59);
Chris@82 911 T5v = T5i + T5j;
Chris@82 912 T5k = T5i - T5j;
Chris@82 913 T5E = FMA(KP980785280, T5v, T5u);
Chris@82 914 T5w = FNMS(KP980785280, T5v, T5u);
Chris@82 915 }
Chris@82 916 {
Chris@82 917 E T54, T5z, T5b, T5A, T57, T5a;
Chris@82 918 T54 = FNMS(KP923879532, T53, T52);
Chris@82 919 T5z = FNMS(KP923879532, T5g, T5f);
Chris@82 920 T57 = FNMS(KP198912367, T56, T55);
Chris@82 921 T5a = FNMS(KP198912367, T59, T58);
Chris@82 922 T5b = T57 + T5a;
Chris@82 923 T5A = T5a - T57;
Chris@82 924 T5c = FMA(KP980785280, T5b, T54);
Chris@82 925 T5H = FNMS(KP980785280, T5A, T5z);
Chris@82 926 T5o = FNMS(KP980785280, T5b, T54);
Chris@82 927 T5B = FMA(KP980785280, T5A, T5z);
Chris@82 928 }
Chris@82 929 {
Chris@82 930 E T5x, T5C, T5t, T5y;
Chris@82 931 T5t = W[28];
Chris@82 932 T5x = T5t * T5w;
Chris@82 933 T5C = T5t * T5B;
Chris@82 934 T5y = W[29];
Chris@82 935 cr[WS(rs, 15)] = FNMS(T5y, T5B, T5x);
Chris@82 936 ci[WS(rs, 15)] = FMA(T5y, T5w, T5C);
Chris@82 937 }
Chris@82 938 {
Chris@82 939 E T5F, T5I, T5D, T5G;
Chris@82 940 T5D = W[60];
Chris@82 941 T5F = T5D * T5E;
Chris@82 942 T5I = T5D * T5H;
Chris@82 943 T5G = W[61];
Chris@82 944 cr[WS(rs, 31)] = FNMS(T5G, T5H, T5F);
Chris@82 945 ci[WS(rs, 31)] = FMA(T5G, T5E, T5I);
Chris@82 946 }
Chris@82 947 {
Chris@82 948 E T5l, T5e, T5m, T51, T5d;
Chris@82 949 T5l = FNMS(KP980785280, T5k, T5h);
Chris@82 950 T5e = W[45];
Chris@82 951 T5m = T5e * T5c;
Chris@82 952 T51 = W[44];
Chris@82 953 T5d = T51 * T5c;
Chris@82 954 cr[WS(rs, 23)] = FNMS(T5e, T5l, T5d);
Chris@82 955 ci[WS(rs, 23)] = FMA(T51, T5l, T5m);
Chris@82 956 }
Chris@82 957 {
Chris@82 958 E T5r, T5q, T5s, T5n, T5p;
Chris@82 959 T5r = FMA(KP980785280, T5k, T5h);
Chris@82 960 T5q = W[13];
Chris@82 961 T5s = T5q * T5o;
Chris@82 962 T5n = W[12];
Chris@82 963 T5p = T5n * T5o;
Chris@82 964 cr[WS(rs, 7)] = FNMS(T5q, T5r, T5p);
Chris@82 965 ci[WS(rs, 7)] = FMA(T5n, T5r, T5s);
Chris@82 966 }
Chris@82 967 }
Chris@82 968 }
Chris@82 969 }
Chris@82 970 }
Chris@82 971
Chris@82 972 static const tw_instr twinstr[] = {
Chris@82 973 {TW_FULL, 1, 32},
Chris@82 974 {TW_NEXT, 1, 0}
Chris@82 975 };
Chris@82 976
Chris@82 977 static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@82 978
Chris@82 979 void X(codelet_hb_32) (planner *p) {
Chris@82 980 X(khc2hc_register) (p, hb_32, &desc);
Chris@82 981 }
Chris@82 982 #else
Chris@82 983
Chris@82 984 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include rdft/scalar/hb.h */
Chris@82 985
Chris@82 986 /*
Chris@82 987 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 988 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 989 * 98 stack variables, 7 constants, and 128 memory accesses
Chris@82 990 */
Chris@82 991 #include "rdft/scalar/hb.h"
Chris@82 992
Chris@82 993 static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 994 {
Chris@82 995 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 996 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 997 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 998 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 999 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1000 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1001 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1002 {
Chris@82 1003 INT m;
Chris@82 1004 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 1005 E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U;
Chris@82 1006 E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z;
Chris@82 1007 E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54;
Chris@82 1008 E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64;
Chris@82 1009 E T5n, T65, T38, T3Q;
Chris@82 1010 {
Chris@82 1011 E T3, T4m, T24, T4q, T27, T4t, T6, T5s, Ta, T4p, T1X, T5t, T20, T4n, Td;
Chris@82 1012 E T4s;
Chris@82 1013 {
Chris@82 1014 E T1, T2, T22, T23;
Chris@82 1015 T1 = cr[0];
Chris@82 1016 T2 = ci[WS(rs, 15)];
Chris@82 1017 T3 = T1 + T2;
Chris@82 1018 T4m = T1 - T2;
Chris@82 1019 T22 = ci[WS(rs, 27)];
Chris@82 1020 T23 = cr[WS(rs, 20)];
Chris@82 1021 T24 = T22 - T23;
Chris@82 1022 T4q = T22 + T23;
Chris@82 1023 }
Chris@82 1024 {
Chris@82 1025 E T25, T26, T4, T5;
Chris@82 1026 T25 = ci[WS(rs, 19)];
Chris@82 1027 T26 = cr[WS(rs, 28)];
Chris@82 1028 T27 = T25 - T26;
Chris@82 1029 T4t = T25 + T26;
Chris@82 1030 T4 = cr[WS(rs, 8)];
Chris@82 1031 T5 = ci[WS(rs, 7)];
Chris@82 1032 T6 = T4 + T5;
Chris@82 1033 T5s = T4 - T5;
Chris@82 1034 }
Chris@82 1035 {
Chris@82 1036 E T8, T9, T1V, T1W;
Chris@82 1037 T8 = cr[WS(rs, 4)];
Chris@82 1038 T9 = ci[WS(rs, 11)];
Chris@82 1039 Ta = T8 + T9;
Chris@82 1040 T4p = T8 - T9;
Chris@82 1041 T1V = ci[WS(rs, 31)];
Chris@82 1042 T1W = cr[WS(rs, 16)];
Chris@82 1043 T1X = T1V - T1W;
Chris@82 1044 T5t = T1V + T1W;
Chris@82 1045 }
Chris@82 1046 {
Chris@82 1047 E T1Y, T1Z, Tb, Tc;
Chris@82 1048 T1Y = ci[WS(rs, 23)];
Chris@82 1049 T1Z = cr[WS(rs, 24)];
Chris@82 1050 T20 = T1Y - T1Z;
Chris@82 1051 T4n = T1Y + T1Z;
Chris@82 1052 Tb = ci[WS(rs, 3)];
Chris@82 1053 Tc = cr[WS(rs, 12)];
Chris@82 1054 Td = Tb + Tc;
Chris@82 1055 T4s = Tb - Tc;
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T7, Te, T21, T28;
Chris@82 1059 T4o = T4m - T4n;
Chris@82 1060 T6y = T4m + T4n;
Chris@82 1061 T70 = T5t - T5s;
Chris@82 1062 T5u = T5s + T5t;
Chris@82 1063 T7 = T3 + T6;
Chris@82 1064 Te = Ta + Td;
Chris@82 1065 Tf = T7 + Te;
Chris@82 1066 T12 = T7 - Te;
Chris@82 1067 {
Chris@82 1068 E T5v, T5w, T3k, T3l;
Chris@82 1069 T5v = T4p + T4q;
Chris@82 1070 T5w = T4s + T4t;
Chris@82 1071 T5x = KP707106781 * (T5v - T5w);
Chris@82 1072 T6z = KP707106781 * (T5v + T5w);
Chris@82 1073 T3k = T1X - T20;
Chris@82 1074 T3l = Ta - Td;
Chris@82 1075 T3m = T3k - T3l;
Chris@82 1076 T3Y = T3l + T3k;
Chris@82 1077 }
Chris@82 1078 T21 = T1X + T20;
Chris@82 1079 T28 = T24 + T27;
Chris@82 1080 T29 = T21 - T28;
Chris@82 1081 T2y = T21 + T28;
Chris@82 1082 {
Chris@82 1083 E T4r, T4u, T2S, T2T;
Chris@82 1084 T4r = T4p - T4q;
Chris@82 1085 T4u = T4s - T4t;
Chris@82 1086 T4v = KP707106781 * (T4r + T4u);
Chris@82 1087 T71 = KP707106781 * (T4r - T4u);
Chris@82 1088 T2S = T3 - T6;
Chris@82 1089 T2T = T27 - T24;
Chris@82 1090 T2U = T2S - T2T;
Chris@82 1091 T3M = T2S + T2T;
Chris@82 1092 }
Chris@82 1093 }
Chris@82 1094 }
Chris@82 1095 {
Chris@82 1096 E Ti, T4H, T1c, T4F, T1f, T4I, Tl, T4E, Tp, T4A, T15, T4y, T18, T4B, Ts;
Chris@82 1097 E T4x;
Chris@82 1098 {
Chris@82 1099 E Tg, Th, T1a, T1b;
Chris@82 1100 Tg = cr[WS(rs, 2)];
Chris@82 1101 Th = ci[WS(rs, 13)];
Chris@82 1102 Ti = Tg + Th;
Chris@82 1103 T4H = Tg - Th;
Chris@82 1104 T1a = ci[WS(rs, 29)];
Chris@82 1105 T1b = cr[WS(rs, 18)];
Chris@82 1106 T1c = T1a - T1b;
Chris@82 1107 T4F = T1a + T1b;
Chris@82 1108 }
Chris@82 1109 {
Chris@82 1110 E T1d, T1e, Tj, Tk;
Chris@82 1111 T1d = ci[WS(rs, 21)];
Chris@82 1112 T1e = cr[WS(rs, 26)];
Chris@82 1113 T1f = T1d - T1e;
Chris@82 1114 T4I = T1d + T1e;
Chris@82 1115 Tj = cr[WS(rs, 10)];
Chris@82 1116 Tk = ci[WS(rs, 5)];
Chris@82 1117 Tl = Tj + Tk;
Chris@82 1118 T4E = Tj - Tk;
Chris@82 1119 }
Chris@82 1120 {
Chris@82 1121 E Tn, To, T13, T14;
Chris@82 1122 Tn = ci[WS(rs, 1)];
Chris@82 1123 To = cr[WS(rs, 14)];
Chris@82 1124 Tp = Tn + To;
Chris@82 1125 T4A = Tn - To;
Chris@82 1126 T13 = ci[WS(rs, 17)];
Chris@82 1127 T14 = cr[WS(rs, 30)];
Chris@82 1128 T15 = T13 - T14;
Chris@82 1129 T4y = T13 + T14;
Chris@82 1130 }
Chris@82 1131 {
Chris@82 1132 E T16, T17, Tq, Tr;
Chris@82 1133 T16 = ci[WS(rs, 25)];
Chris@82 1134 T17 = cr[WS(rs, 22)];
Chris@82 1135 T18 = T16 - T17;
Chris@82 1136 T4B = T16 + T17;
Chris@82 1137 Tq = cr[WS(rs, 6)];
Chris@82 1138 Tr = ci[WS(rs, 9)];
Chris@82 1139 Ts = Tq + Tr;
Chris@82 1140 T4x = Tq - Tr;
Chris@82 1141 }
Chris@82 1142 {
Chris@82 1143 E Tm, Tt, T6B, T6C;
Chris@82 1144 Tm = Ti + Tl;
Chris@82 1145 Tt = Tp + Ts;
Chris@82 1146 Tu = Tm + Tt;
Chris@82 1147 T1U = Tm - Tt;
Chris@82 1148 T6B = T4H + T4I;
Chris@82 1149 T6C = T4F - T4E;
Chris@82 1150 T6D = FNMS(KP923879532, T6C, KP382683432 * T6B);
Chris@82 1151 T73 = FMA(KP382683432, T6C, KP923879532 * T6B);
Chris@82 1152 }
Chris@82 1153 {
Chris@82 1154 E T6E, T6F, T19, T1g;
Chris@82 1155 T6E = T4A + T4B;
Chris@82 1156 T6F = T4x + T4y;
Chris@82 1157 T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
Chris@82 1158 T74 = FMA(KP382683432, T6F, KP923879532 * T6E);
Chris@82 1159 T19 = T15 + T18;
Chris@82 1160 T1g = T1c + T1f;
Chris@82 1161 T1h = T19 - T1g;
Chris@82 1162 T2z = T1g + T19;
Chris@82 1163 }
Chris@82 1164 {
Chris@82 1165 E T2V, T2W, T4z, T4C;
Chris@82 1166 T2V = T15 - T18;
Chris@82 1167 T2W = Tp - Ts;
Chris@82 1168 T2X = T2V - T2W;
Chris@82 1169 T3o = T2W + T2V;
Chris@82 1170 T4z = T4x - T4y;
Chris@82 1171 T4C = T4A - T4B;
Chris@82 1172 T4D = FNMS(KP382683432, T4C, KP923879532 * T4z);
Chris@82 1173 T5A = FMA(KP382683432, T4z, KP923879532 * T4C);
Chris@82 1174 }
Chris@82 1175 {
Chris@82 1176 E T4G, T4J, T2Y, T2Z;
Chris@82 1177 T4G = T4E + T4F;
Chris@82 1178 T4J = T4H - T4I;
Chris@82 1179 T4K = FMA(KP923879532, T4G, KP382683432 * T4J);
Chris@82 1180 T5z = FNMS(KP382683432, T4G, KP923879532 * T4J);
Chris@82 1181 T2Y = Ti - Tl;
Chris@82 1182 T2Z = T1c - T1f;
Chris@82 1183 T30 = T2Y + T2Z;
Chris@82 1184 T3n = T2Y - T2Z;
Chris@82 1185 }
Chris@82 1186 }
Chris@82 1187 {
Chris@82 1188 E Ty, T4N, TB, T4Y, T1p, T4O, T1m, T4Z, TI, T52, T1w, T4V, TF, T51, T1t;
Chris@82 1189 E T4S;
Chris@82 1190 {
Chris@82 1191 E Tw, Tx, T1k, T1l;
Chris@82 1192 Tw = cr[WS(rs, 1)];
Chris@82 1193 Tx = ci[WS(rs, 14)];
Chris@82 1194 Ty = Tw + Tx;
Chris@82 1195 T4N = Tw - Tx;
Chris@82 1196 {
Chris@82 1197 E Tz, TA, T1n, T1o;
Chris@82 1198 Tz = cr[WS(rs, 9)];
Chris@82 1199 TA = ci[WS(rs, 6)];
Chris@82 1200 TB = Tz + TA;
Chris@82 1201 T4Y = Tz - TA;
Chris@82 1202 T1n = ci[WS(rs, 22)];
Chris@82 1203 T1o = cr[WS(rs, 25)];
Chris@82 1204 T1p = T1n - T1o;
Chris@82 1205 T4O = T1n + T1o;
Chris@82 1206 }
Chris@82 1207 T1k = ci[WS(rs, 30)];
Chris@82 1208 T1l = cr[WS(rs, 17)];
Chris@82 1209 T1m = T1k - T1l;
Chris@82 1210 T4Z = T1k + T1l;
Chris@82 1211 {
Chris@82 1212 E TG, TH, T4T, T1u, T1v, T4U;
Chris@82 1213 TG = ci[WS(rs, 2)];
Chris@82 1214 TH = cr[WS(rs, 13)];
Chris@82 1215 T4T = TG - TH;
Chris@82 1216 T1u = ci[WS(rs, 18)];
Chris@82 1217 T1v = cr[WS(rs, 29)];
Chris@82 1218 T4U = T1u + T1v;
Chris@82 1219 TI = TG + TH;
Chris@82 1220 T52 = T4T + T4U;
Chris@82 1221 T1w = T1u - T1v;
Chris@82 1222 T4V = T4T - T4U;
Chris@82 1223 }
Chris@82 1224 {
Chris@82 1225 E TD, TE, T4Q, T1r, T1s, T4R;
Chris@82 1226 TD = cr[WS(rs, 5)];
Chris@82 1227 TE = ci[WS(rs, 10)];
Chris@82 1228 T4Q = TD - TE;
Chris@82 1229 T1r = ci[WS(rs, 26)];
Chris@82 1230 T1s = cr[WS(rs, 21)];
Chris@82 1231 T4R = T1r + T1s;
Chris@82 1232 TF = TD + TE;
Chris@82 1233 T51 = T4Q + T4R;
Chris@82 1234 T1t = T1r - T1s;
Chris@82 1235 T4S = T4Q - T4R;
Chris@82 1236 }
Chris@82 1237 }
Chris@82 1238 {
Chris@82 1239 E TC, TJ, T6Q, T6R;
Chris@82 1240 TC = Ty + TB;
Chris@82 1241 TJ = TF + TI;
Chris@82 1242 TK = TC + TJ;
Chris@82 1243 T1j = TC - TJ;
Chris@82 1244 T6Q = T4Z - T4Y;
Chris@82 1245 T6R = KP707106781 * (T4S - T4V);
Chris@82 1246 T6S = T6Q + T6R;
Chris@82 1247 T7w = T6Q - T6R;
Chris@82 1248 }
Chris@82 1249 {
Chris@82 1250 E T6T, T6U, T1q, T1x;
Chris@82 1251 T6T = T4N + T4O;
Chris@82 1252 T6U = KP707106781 * (T51 + T52);
Chris@82 1253 T6V = T6T - T6U;
Chris@82 1254 T7v = T6T + T6U;
Chris@82 1255 T1q = T1m + T1p;
Chris@82 1256 T1x = T1t + T1w;
Chris@82 1257 T1y = T1q - T1x;
Chris@82 1258 T2B = T1q + T1x;
Chris@82 1259 }
Chris@82 1260 {
Chris@82 1261 E T3a, T3b, T4P, T4W;
Chris@82 1262 T3a = T1m - T1p;
Chris@82 1263 T3b = TF - TI;
Chris@82 1264 T3c = T3a - T3b;
Chris@82 1265 T3S = T3b + T3a;
Chris@82 1266 T4P = T4N - T4O;
Chris@82 1267 T4W = KP707106781 * (T4S + T4V);
Chris@82 1268 T4X = T4P - T4W;
Chris@82 1269 T61 = T4P + T4W;
Chris@82 1270 }
Chris@82 1271 {
Chris@82 1272 E T50, T53, T3d, T3e;
Chris@82 1273 T50 = T4Y + T4Z;
Chris@82 1274 T53 = KP707106781 * (T51 - T52);
Chris@82 1275 T54 = T50 - T53;
Chris@82 1276 T62 = T50 + T53;
Chris@82 1277 T3d = Ty - TB;
Chris@82 1278 T3e = T1w - T1t;
Chris@82 1279 T3f = T3d - T3e;
Chris@82 1280 T3T = T3d + T3e;
Chris@82 1281 }
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E TN, T56, TQ, T5h, T1G, T57, T1D, T5i, TX, T5l, T1N, T5e, TU, T5k, T1K;
Chris@82 1285 E T5b;
Chris@82 1286 {
Chris@82 1287 E TL, TM, T1B, T1C;
Chris@82 1288 TL = ci[0];
Chris@82 1289 TM = cr[WS(rs, 15)];
Chris@82 1290 TN = TL + TM;
Chris@82 1291 T56 = TL - TM;
Chris@82 1292 {
Chris@82 1293 E TO, TP, T1E, T1F;
Chris@82 1294 TO = cr[WS(rs, 7)];
Chris@82 1295 TP = ci[WS(rs, 8)];
Chris@82 1296 TQ = TO + TP;
Chris@82 1297 T5h = TO - TP;
Chris@82 1298 T1E = ci[WS(rs, 24)];
Chris@82 1299 T1F = cr[WS(rs, 23)];
Chris@82 1300 T1G = T1E - T1F;
Chris@82 1301 T57 = T1E + T1F;
Chris@82 1302 }
Chris@82 1303 T1B = ci[WS(rs, 16)];
Chris@82 1304 T1C = cr[WS(rs, 31)];
Chris@82 1305 T1D = T1B - T1C;
Chris@82 1306 T5i = T1B + T1C;
Chris@82 1307 {
Chris@82 1308 E TV, TW, T5c, T1L, T1M, T5d;
Chris@82 1309 TV = ci[WS(rs, 4)];
Chris@82 1310 TW = cr[WS(rs, 11)];
Chris@82 1311 T5c = TV - TW;
Chris@82 1312 T1L = ci[WS(rs, 20)];
Chris@82 1313 T1M = cr[WS(rs, 27)];
Chris@82 1314 T5d = T1L + T1M;
Chris@82 1315 TX = TV + TW;
Chris@82 1316 T5l = T5c + T5d;
Chris@82 1317 T1N = T1L - T1M;
Chris@82 1318 T5e = T5c - T5d;
Chris@82 1319 }
Chris@82 1320 {
Chris@82 1321 E TS, TT, T59, T1I, T1J, T5a;
Chris@82 1322 TS = cr[WS(rs, 3)];
Chris@82 1323 TT = ci[WS(rs, 12)];
Chris@82 1324 T59 = TS - TT;
Chris@82 1325 T1I = ci[WS(rs, 28)];
Chris@82 1326 T1J = cr[WS(rs, 19)];
Chris@82 1327 T5a = T1I + T1J;
Chris@82 1328 TU = TS + TT;
Chris@82 1329 T5k = T59 + T5a;
Chris@82 1330 T1K = T1I - T1J;
Chris@82 1331 T5b = T59 - T5a;
Chris@82 1332 }
Chris@82 1333 }
Chris@82 1334 {
Chris@82 1335 E TR, TY, T6J, T6K;
Chris@82 1336 TR = TN + TQ;
Chris@82 1337 TY = TU + TX;
Chris@82 1338 TZ = TR + TY;
Chris@82 1339 T1A = TR - TY;
Chris@82 1340 T6J = KP707106781 * (T5b - T5e);
Chris@82 1341 T6K = T5h + T5i;
Chris@82 1342 T6L = T6J - T6K;
Chris@82 1343 T7z = T6K + T6J;
Chris@82 1344 }
Chris@82 1345 {
Chris@82 1346 E T6M, T6N, T1H, T1O;
Chris@82 1347 T6M = T56 + T57;
Chris@82 1348 T6N = KP707106781 * (T5k + T5l);
Chris@82 1349 T6O = T6M - T6N;
Chris@82 1350 T7y = T6M + T6N;
Chris@82 1351 T1H = T1D + T1G;
Chris@82 1352 T1O = T1K + T1N;
Chris@82 1353 T1P = T1H - T1O;
Chris@82 1354 T2C = T1H + T1O;
Chris@82 1355 }
Chris@82 1356 {
Chris@82 1357 E T33, T34, T58, T5f;
Chris@82 1358 T33 = T1D - T1G;
Chris@82 1359 T34 = TU - TX;
Chris@82 1360 T35 = T33 - T34;
Chris@82 1361 T3P = T34 + T33;
Chris@82 1362 T58 = T56 - T57;
Chris@82 1363 T5f = KP707106781 * (T5b + T5e);
Chris@82 1364 T5g = T58 - T5f;
Chris@82 1365 T64 = T58 + T5f;
Chris@82 1366 }
Chris@82 1367 {
Chris@82 1368 E T5j, T5m, T36, T37;
Chris@82 1369 T5j = T5h - T5i;
Chris@82 1370 T5m = KP707106781 * (T5k - T5l);
Chris@82 1371 T5n = T5j - T5m;
Chris@82 1372 T65 = T5j + T5m;
Chris@82 1373 T36 = TN - TQ;
Chris@82 1374 T37 = T1N - T1K;
Chris@82 1375 T38 = T36 - T37;
Chris@82 1376 T3Q = T36 + T37;
Chris@82 1377 }
Chris@82 1378 }
Chris@82 1379 {
Chris@82 1380 E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x;
Chris@82 1381 Tv = Tf + Tu;
Chris@82 1382 T10 = TK + TZ;
Chris@82 1383 T2w = Tv - T10;
Chris@82 1384 T2A = T2y + T2z;
Chris@82 1385 T2D = T2B + T2C;
Chris@82 1386 T2E = T2A - T2D;
Chris@82 1387 cr[0] = Tv + T10;
Chris@82 1388 ci[0] = T2A + T2D;
Chris@82 1389 T2v = W[30];
Chris@82 1390 T2x = W[31];
Chris@82 1391 cr[WS(rs, 16)] = FNMS(T2x, T2E, T2v * T2w);
Chris@82 1392 ci[WS(rs, 16)] = FMA(T2x, T2w, T2v * T2E);
Chris@82 1393 }
Chris@82 1394 {
Chris@82 1395 E T2I, T2O, T2M, T2Q;
Chris@82 1396 {
Chris@82 1397 E T2G, T2H, T2K, T2L;
Chris@82 1398 T2G = Tf - Tu;
Chris@82 1399 T2H = T2C - T2B;
Chris@82 1400 T2I = T2G - T2H;
Chris@82 1401 T2O = T2G + T2H;
Chris@82 1402 T2K = T2y - T2z;
Chris@82 1403 T2L = TK - TZ;
Chris@82 1404 T2M = T2K - T2L;
Chris@82 1405 T2Q = T2L + T2K;
Chris@82 1406 }
Chris@82 1407 {
Chris@82 1408 E T2F, T2J, T2N, T2P;
Chris@82 1409 T2F = W[46];
Chris@82 1410 T2J = W[47];
Chris@82 1411 cr[WS(rs, 24)] = FNMS(T2J, T2M, T2F * T2I);
Chris@82 1412 ci[WS(rs, 24)] = FMA(T2F, T2M, T2J * T2I);
Chris@82 1413 T2N = W[14];
Chris@82 1414 T2P = W[15];
Chris@82 1415 cr[WS(rs, 8)] = FNMS(T2P, T2Q, T2N * T2O);
Chris@82 1416 ci[WS(rs, 8)] = FMA(T2N, T2Q, T2P * T2O);
Chris@82 1417 }
Chris@82 1418 }
Chris@82 1419 {
Chris@82 1420 E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p;
Chris@82 1421 T1i = T12 + T1h;
Chris@82 1422 T2a = T1U + T29;
Chris@82 1423 T2o = T29 - T1U;
Chris@82 1424 T2k = T12 - T1h;
Chris@82 1425 {
Chris@82 1426 E T2b, T2c, T1z, T1Q;
Chris@82 1427 T2b = T1j + T1y;
Chris@82 1428 T2c = T1P - T1A;
Chris@82 1429 T2d = KP707106781 * (T2b + T2c);
Chris@82 1430 T2l = KP707106781 * (T2c - T2b);
Chris@82 1431 T1z = T1j - T1y;
Chris@82 1432 T1Q = T1A + T1P;
Chris@82 1433 T1R = KP707106781 * (T1z + T1Q);
Chris@82 1434 T2p = KP707106781 * (T1z - T1Q);
Chris@82 1435 }
Chris@82 1436 {
Chris@82 1437 E T1S, T2e, T11, T1T;
Chris@82 1438 T1S = T1i - T1R;
Chris@82 1439 T2e = T2a - T2d;
Chris@82 1440 T11 = W[38];
Chris@82 1441 T1T = W[39];
Chris@82 1442 cr[WS(rs, 20)] = FNMS(T1T, T2e, T11 * T1S);
Chris@82 1443 ci[WS(rs, 20)] = FMA(T1T, T1S, T11 * T2e);
Chris@82 1444 }
Chris@82 1445 {
Chris@82 1446 E T2s, T2u, T2r, T2t;
Chris@82 1447 T2s = T2k + T2l;
Chris@82 1448 T2u = T2o + T2p;
Chris@82 1449 T2r = W[22];
Chris@82 1450 T2t = W[23];
Chris@82 1451 cr[WS(rs, 12)] = FNMS(T2t, T2u, T2r * T2s);
Chris@82 1452 ci[WS(rs, 12)] = FMA(T2r, T2u, T2t * T2s);
Chris@82 1453 }
Chris@82 1454 {
Chris@82 1455 E T2g, T2i, T2f, T2h;
Chris@82 1456 T2g = T1i + T1R;
Chris@82 1457 T2i = T2a + T2d;
Chris@82 1458 T2f = W[6];
Chris@82 1459 T2h = W[7];
Chris@82 1460 cr[WS(rs, 4)] = FNMS(T2h, T2i, T2f * T2g);
Chris@82 1461 ci[WS(rs, 4)] = FMA(T2h, T2g, T2f * T2i);
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T2m, T2q, T2j, T2n;
Chris@82 1465 T2m = T2k - T2l;
Chris@82 1466 T2q = T2o - T2p;
Chris@82 1467 T2j = W[54];
Chris@82 1468 T2n = W[55];
Chris@82 1469 cr[WS(rs, 28)] = FNMS(T2n, T2q, T2j * T2m);
Chris@82 1470 ci[WS(rs, 28)] = FMA(T2j, T2q, T2n * T2m);
Chris@82 1471 }
Chris@82 1472 }
Chris@82 1473 {
Chris@82 1474 E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z;
Chris@82 1475 T3N = KP707106781 * (T3n + T3o);
Chris@82 1476 T3O = T3M - T3N;
Chris@82 1477 T4a = T3M + T3N;
Chris@82 1478 T3Z = KP707106781 * (T30 + T2X);
Chris@82 1479 T40 = T3Y - T3Z;
Chris@82 1480 T4e = T3Y + T3Z;
Chris@82 1481 {
Chris@82 1482 E T3R, T3U, T41, T42;
Chris@82 1483 T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P);
Chris@82 1484 T3U = FMA(KP923879532, T3S, KP382683432 * T3T);
Chris@82 1485 T3V = T3R - T3U;
Chris@82 1486 T4f = T3U + T3R;
Chris@82 1487 T41 = FNMS(KP382683432, T3S, KP923879532 * T3T);
Chris@82 1488 T42 = FMA(KP382683432, T3P, KP923879532 * T3Q);
Chris@82 1489 T43 = T41 - T42;
Chris@82 1490 T4b = T41 + T42;
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T3W, T44, T3L, T3X;
Chris@82 1494 T3W = T3O - T3V;
Chris@82 1495 T44 = T40 - T43;
Chris@82 1496 T3L = W[50];
Chris@82 1497 T3X = W[51];
Chris@82 1498 cr[WS(rs, 26)] = FNMS(T3X, T44, T3L * T3W);
Chris@82 1499 ci[WS(rs, 26)] = FMA(T3X, T3W, T3L * T44);
Chris@82 1500 }
Chris@82 1501 {
Chris@82 1502 E T4i, T4k, T4h, T4j;
Chris@82 1503 T4i = T4a + T4b;
Chris@82 1504 T4k = T4e + T4f;
Chris@82 1505 T4h = W[2];
Chris@82 1506 T4j = W[3];
Chris@82 1507 cr[WS(rs, 2)] = FNMS(T4j, T4k, T4h * T4i);
Chris@82 1508 ci[WS(rs, 2)] = FMA(T4h, T4k, T4j * T4i);
Chris@82 1509 }
Chris@82 1510 {
Chris@82 1511 E T46, T48, T45, T47;
Chris@82 1512 T46 = T3O + T3V;
Chris@82 1513 T48 = T40 + T43;
Chris@82 1514 T45 = W[18];
Chris@82 1515 T47 = W[19];
Chris@82 1516 cr[WS(rs, 10)] = FNMS(T47, T48, T45 * T46);
Chris@82 1517 ci[WS(rs, 10)] = FMA(T47, T46, T45 * T48);
Chris@82 1518 }
Chris@82 1519 {
Chris@82 1520 E T4c, T4g, T49, T4d;
Chris@82 1521 T4c = T4a - T4b;
Chris@82 1522 T4g = T4e - T4f;
Chris@82 1523 T49 = W[34];
Chris@82 1524 T4d = W[35];
Chris@82 1525 cr[WS(rs, 18)] = FNMS(T4d, T4g, T49 * T4c);
Chris@82 1526 ci[WS(rs, 18)] = FMA(T49, T4g, T4d * T4c);
Chris@82 1527 }
Chris@82 1528 }
Chris@82 1529 {
Chris@82 1530 E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p;
Chris@82 1531 T31 = KP707106781 * (T2X - T30);
Chris@82 1532 T32 = T2U - T31;
Chris@82 1533 T3A = T2U + T31;
Chris@82 1534 T3p = KP707106781 * (T3n - T3o);
Chris@82 1535 T3q = T3m - T3p;
Chris@82 1536 T3E = T3m + T3p;
Chris@82 1537 {
Chris@82 1538 E T39, T3g, T3r, T3s;
Chris@82 1539 T39 = FNMS(KP923879532, T38, KP382683432 * T35);
Chris@82 1540 T3g = FMA(KP382683432, T3c, KP923879532 * T3f);
Chris@82 1541 T3h = T39 - T3g;
Chris@82 1542 T3F = T3g + T39;
Chris@82 1543 T3r = FNMS(KP923879532, T3c, KP382683432 * T3f);
Chris@82 1544 T3s = FMA(KP923879532, T35, KP382683432 * T38);
Chris@82 1545 T3t = T3r - T3s;
Chris@82 1546 T3B = T3r + T3s;
Chris@82 1547 }
Chris@82 1548 {
Chris@82 1549 E T3i, T3u, T2R, T3j;
Chris@82 1550 T3i = T32 - T3h;
Chris@82 1551 T3u = T3q - T3t;
Chris@82 1552 T2R = W[58];
Chris@82 1553 T3j = W[59];
Chris@82 1554 cr[WS(rs, 30)] = FNMS(T3j, T3u, T2R * T3i);
Chris@82 1555 ci[WS(rs, 30)] = FMA(T3j, T3i, T2R * T3u);
Chris@82 1556 }
Chris@82 1557 {
Chris@82 1558 E T3I, T3K, T3H, T3J;
Chris@82 1559 T3I = T3A + T3B;
Chris@82 1560 T3K = T3E + T3F;
Chris@82 1561 T3H = W[10];
Chris@82 1562 T3J = W[11];
Chris@82 1563 cr[WS(rs, 6)] = FNMS(T3J, T3K, T3H * T3I);
Chris@82 1564 ci[WS(rs, 6)] = FMA(T3H, T3K, T3J * T3I);
Chris@82 1565 }
Chris@82 1566 {
Chris@82 1567 E T3w, T3y, T3v, T3x;
Chris@82 1568 T3w = T32 + T3h;
Chris@82 1569 T3y = T3q + T3t;
Chris@82 1570 T3v = W[26];
Chris@82 1571 T3x = W[27];
Chris@82 1572 cr[WS(rs, 14)] = FNMS(T3x, T3y, T3v * T3w);
Chris@82 1573 ci[WS(rs, 14)] = FMA(T3x, T3w, T3v * T3y);
Chris@82 1574 }
Chris@82 1575 {
Chris@82 1576 E T3C, T3G, T3z, T3D;
Chris@82 1577 T3C = T3A - T3B;
Chris@82 1578 T3G = T3E - T3F;
Chris@82 1579 T3z = W[42];
Chris@82 1580 T3D = W[43];
Chris@82 1581 cr[WS(rs, 22)] = FNMS(T3D, T3G, T3z * T3C);
Chris@82 1582 ci[WS(rs, 22)] = FMA(T3z, T3G, T3D * T3C);
Chris@82 1583 }
Chris@82 1584 }
Chris@82 1585 {
Chris@82 1586 E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q;
Chris@82 1587 {
Chris@82 1588 E T5Y, T5Z, T6d, T6e;
Chris@82 1589 T5Y = T4o + T4v;
Chris@82 1590 T5Z = T5z + T5A;
Chris@82 1591 T60 = T5Y + T5Z;
Chris@82 1592 T6m = T5Y - T5Z;
Chris@82 1593 T6d = FMA(KP195090322, T61, KP980785280 * T62);
Chris@82 1594 T6e = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@82 1595 T6f = T6d + T6e;
Chris@82 1596 T6n = T6e - T6d;
Chris@82 1597 }
Chris@82 1598 {
Chris@82 1599 E T63, T66, T6a, T6b;
Chris@82 1600 T63 = FNMS(KP195090322, T62, KP980785280 * T61);
Chris@82 1601 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@82 1602 T67 = T63 + T66;
Chris@82 1603 T6r = T63 - T66;
Chris@82 1604 T6a = T5u + T5x;
Chris@82 1605 T6b = T4K + T4D;
Chris@82 1606 T6c = T6a + T6b;
Chris@82 1607 T6q = T6a - T6b;
Chris@82 1608 }
Chris@82 1609 {
Chris@82 1610 E T68, T6g, T5X, T69;
Chris@82 1611 T68 = T60 - T67;
Chris@82 1612 T6g = T6c - T6f;
Chris@82 1613 T5X = W[32];
Chris@82 1614 T69 = W[33];
Chris@82 1615 cr[WS(rs, 17)] = FNMS(T69, T6g, T5X * T68);
Chris@82 1616 ci[WS(rs, 17)] = FMA(T69, T68, T5X * T6g);
Chris@82 1617 }
Chris@82 1618 {
Chris@82 1619 E T6u, T6w, T6t, T6v;
Chris@82 1620 T6u = T6m + T6n;
Chris@82 1621 T6w = T6q + T6r;
Chris@82 1622 T6t = W[16];
Chris@82 1623 T6v = W[17];
Chris@82 1624 cr[WS(rs, 9)] = FNMS(T6v, T6w, T6t * T6u);
Chris@82 1625 ci[WS(rs, 9)] = FMA(T6t, T6w, T6v * T6u);
Chris@82 1626 }
Chris@82 1627 {
Chris@82 1628 E T6i, T6k, T6h, T6j;
Chris@82 1629 T6i = T60 + T67;
Chris@82 1630 T6k = T6c + T6f;
Chris@82 1631 T6h = W[0];
Chris@82 1632 T6j = W[1];
Chris@82 1633 cr[WS(rs, 1)] = FNMS(T6j, T6k, T6h * T6i);
Chris@82 1634 ci[WS(rs, 1)] = FMA(T6j, T6i, T6h * T6k);
Chris@82 1635 }
Chris@82 1636 {
Chris@82 1637 E T6o, T6s, T6l, T6p;
Chris@82 1638 T6o = T6m - T6n;
Chris@82 1639 T6s = T6q - T6r;
Chris@82 1640 T6l = W[48];
Chris@82 1641 T6p = W[49];
Chris@82 1642 cr[WS(rs, 25)] = FNMS(T6p, T6s, T6l * T6o);
Chris@82 1643 ci[WS(rs, 25)] = FMA(T6l, T6s, T6p * T6o);
Chris@82 1644 }
Chris@82 1645 }
Chris@82 1646 {
Chris@82 1647 E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U;
Chris@82 1648 {
Chris@82 1649 E T7s, T7t, T7H, T7I;
Chris@82 1650 T7s = T6y + T6z;
Chris@82 1651 T7t = T73 + T74;
Chris@82 1652 T7u = T7s - T7t;
Chris@82 1653 T7Q = T7s + T7t;
Chris@82 1654 T7H = FMA(KP195090322, T7w, KP980785280 * T7v);
Chris@82 1655 T7I = FMA(KP195090322, T7z, KP980785280 * T7y);
Chris@82 1656 T7J = T7H - T7I;
Chris@82 1657 T7R = T7H + T7I;
Chris@82 1658 }
Chris@82 1659 {
Chris@82 1660 E T7x, T7A, T7E, T7F;
Chris@82 1661 T7x = FNMS(KP980785280, T7w, KP195090322 * T7v);
Chris@82 1662 T7A = FNMS(KP980785280, T7z, KP195090322 * T7y);
Chris@82 1663 T7B = T7x + T7A;
Chris@82 1664 T7V = T7x - T7A;
Chris@82 1665 T7E = T70 - T71;
Chris@82 1666 T7F = T6D - T6G;
Chris@82 1667 T7G = T7E + T7F;
Chris@82 1668 T7U = T7E - T7F;
Chris@82 1669 }
Chris@82 1670 {
Chris@82 1671 E T7C, T7K, T7r, T7D;
Chris@82 1672 T7C = T7u - T7B;
Chris@82 1673 T7K = T7G - T7J;
Chris@82 1674 T7r = W[44];
Chris@82 1675 T7D = W[45];
Chris@82 1676 cr[WS(rs, 23)] = FNMS(T7D, T7K, T7r * T7C);
Chris@82 1677 ci[WS(rs, 23)] = FMA(T7D, T7C, T7r * T7K);
Chris@82 1678 }
Chris@82 1679 {
Chris@82 1680 E T7Y, T80, T7X, T7Z;
Chris@82 1681 T7Y = T7Q + T7R;
Chris@82 1682 T80 = T7U - T7V;
Chris@82 1683 T7X = W[60];
Chris@82 1684 T7Z = W[61];
Chris@82 1685 cr[WS(rs, 31)] = FNMS(T7Z, T80, T7X * T7Y);
Chris@82 1686 ci[WS(rs, 31)] = FMA(T7X, T80, T7Z * T7Y);
Chris@82 1687 }
Chris@82 1688 {
Chris@82 1689 E T7M, T7O, T7L, T7N;
Chris@82 1690 T7M = T7u + T7B;
Chris@82 1691 T7O = T7G + T7J;
Chris@82 1692 T7L = W[12];
Chris@82 1693 T7N = W[13];
Chris@82 1694 cr[WS(rs, 7)] = FNMS(T7N, T7O, T7L * T7M);
Chris@82 1695 ci[WS(rs, 7)] = FMA(T7N, T7M, T7L * T7O);
Chris@82 1696 }
Chris@82 1697 {
Chris@82 1698 E T7S, T7W, T7P, T7T;
Chris@82 1699 T7S = T7Q - T7R;
Chris@82 1700 T7W = T7U + T7V;
Chris@82 1701 T7P = W[28];
Chris@82 1702 T7T = W[29];
Chris@82 1703 cr[WS(rs, 15)] = FNMS(T7T, T7W, T7P * T7S);
Chris@82 1704 ci[WS(rs, 15)] = FMA(T7P, T7W, T7T * T7S);
Chris@82 1705 }
Chris@82 1706 }
Chris@82 1707 {
Chris@82 1708 E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q;
Chris@82 1709 {
Chris@82 1710 E T4w, T4L, T5D, T5E;
Chris@82 1711 T4w = T4o - T4v;
Chris@82 1712 T4L = T4D - T4K;
Chris@82 1713 T4M = T4w + T4L;
Chris@82 1714 T5M = T4w - T4L;
Chris@82 1715 T5D = FMA(KP831469612, T4X, KP555570233 * T54);
Chris@82 1716 T5E = FNMS(KP831469612, T5g, KP555570233 * T5n);
Chris@82 1717 T5F = T5D + T5E;
Chris@82 1718 T5N = T5E - T5D;
Chris@82 1719 }
Chris@82 1720 {
Chris@82 1721 E T55, T5o, T5y, T5B;
Chris@82 1722 T55 = FNMS(KP831469612, T54, KP555570233 * T4X);
Chris@82 1723 T5o = FMA(KP555570233, T5g, KP831469612 * T5n);
Chris@82 1724 T5p = T55 + T5o;
Chris@82 1725 T5R = T55 - T5o;
Chris@82 1726 T5y = T5u - T5x;
Chris@82 1727 T5B = T5z - T5A;
Chris@82 1728 T5C = T5y + T5B;
Chris@82 1729 T5Q = T5y - T5B;
Chris@82 1730 }
Chris@82 1731 {
Chris@82 1732 E T5q, T5G, T4l, T5r;
Chris@82 1733 T5q = T4M - T5p;
Chris@82 1734 T5G = T5C - T5F;
Chris@82 1735 T4l = W[40];
Chris@82 1736 T5r = W[41];
Chris@82 1737 cr[WS(rs, 21)] = FNMS(T5r, T5G, T4l * T5q);
Chris@82 1738 ci[WS(rs, 21)] = FMA(T5r, T5q, T4l * T5G);
Chris@82 1739 }
Chris@82 1740 {
Chris@82 1741 E T5U, T5W, T5T, T5V;
Chris@82 1742 T5U = T5M + T5N;
Chris@82 1743 T5W = T5Q + T5R;
Chris@82 1744 T5T = W[24];
Chris@82 1745 T5V = W[25];
Chris@82 1746 cr[WS(rs, 13)] = FNMS(T5V, T5W, T5T * T5U);
Chris@82 1747 ci[WS(rs, 13)] = FMA(T5T, T5W, T5V * T5U);
Chris@82 1748 }
Chris@82 1749 {
Chris@82 1750 E T5I, T5K, T5H, T5J;
Chris@82 1751 T5I = T4M + T5p;
Chris@82 1752 T5K = T5C + T5F;
Chris@82 1753 T5H = W[8];
Chris@82 1754 T5J = W[9];
Chris@82 1755 cr[WS(rs, 5)] = FNMS(T5J, T5K, T5H * T5I);
Chris@82 1756 ci[WS(rs, 5)] = FMA(T5J, T5I, T5H * T5K);
Chris@82 1757 }
Chris@82 1758 {
Chris@82 1759 E T5O, T5S, T5L, T5P;
Chris@82 1760 T5O = T5M - T5N;
Chris@82 1761 T5S = T5Q - T5R;
Chris@82 1762 T5L = W[56];
Chris@82 1763 T5P = W[57];
Chris@82 1764 cr[WS(rs, 29)] = FNMS(T5P, T5S, T5L * T5O);
Chris@82 1765 ci[WS(rs, 29)] = FMA(T5L, T5S, T5P * T5O);
Chris@82 1766 }
Chris@82 1767 }
Chris@82 1768 {
Chris@82 1769 E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k;
Chris@82 1770 {
Chris@82 1771 E T6A, T6H, T77, T78;
Chris@82 1772 T6A = T6y - T6z;
Chris@82 1773 T6H = T6D + T6G;
Chris@82 1774 T6I = T6A - T6H;
Chris@82 1775 T7g = T6A + T6H;
Chris@82 1776 T77 = FNMS(KP555570233, T6S, KP831469612 * T6V);
Chris@82 1777 T78 = FMA(KP555570233, T6L, KP831469612 * T6O);
Chris@82 1778 T79 = T77 - T78;
Chris@82 1779 T7h = T77 + T78;
Chris@82 1780 }
Chris@82 1781 {
Chris@82 1782 E T6P, T6W, T72, T75;
Chris@82 1783 T6P = FNMS(KP555570233, T6O, KP831469612 * T6L);
Chris@82 1784 T6W = FMA(KP831469612, T6S, KP555570233 * T6V);
Chris@82 1785 T6X = T6P - T6W;
Chris@82 1786 T7l = T6W + T6P;
Chris@82 1787 T72 = T70 + T71;
Chris@82 1788 T75 = T73 - T74;
Chris@82 1789 T76 = T72 - T75;
Chris@82 1790 T7k = T72 + T75;
Chris@82 1791 }
Chris@82 1792 {
Chris@82 1793 E T6Y, T7a, T6x, T6Z;
Chris@82 1794 T6Y = T6I - T6X;
Chris@82 1795 T7a = T76 - T79;
Chris@82 1796 T6x = W[52];
Chris@82 1797 T6Z = W[53];
Chris@82 1798 cr[WS(rs, 27)] = FNMS(T6Z, T7a, T6x * T6Y);
Chris@82 1799 ci[WS(rs, 27)] = FMA(T6Z, T6Y, T6x * T7a);
Chris@82 1800 }
Chris@82 1801 {
Chris@82 1802 E T7o, T7q, T7n, T7p;
Chris@82 1803 T7o = T7g + T7h;
Chris@82 1804 T7q = T7k + T7l;
Chris@82 1805 T7n = W[4];
Chris@82 1806 T7p = W[5];
Chris@82 1807 cr[WS(rs, 3)] = FNMS(T7p, T7q, T7n * T7o);
Chris@82 1808 ci[WS(rs, 3)] = FMA(T7n, T7q, T7p * T7o);
Chris@82 1809 }
Chris@82 1810 {
Chris@82 1811 E T7c, T7e, T7b, T7d;
Chris@82 1812 T7c = T6I + T6X;
Chris@82 1813 T7e = T76 + T79;
Chris@82 1814 T7b = W[20];
Chris@82 1815 T7d = W[21];
Chris@82 1816 cr[WS(rs, 11)] = FNMS(T7d, T7e, T7b * T7c);
Chris@82 1817 ci[WS(rs, 11)] = FMA(T7d, T7c, T7b * T7e);
Chris@82 1818 }
Chris@82 1819 {
Chris@82 1820 E T7i, T7m, T7f, T7j;
Chris@82 1821 T7i = T7g - T7h;
Chris@82 1822 T7m = T7k - T7l;
Chris@82 1823 T7f = W[36];
Chris@82 1824 T7j = W[37];
Chris@82 1825 cr[WS(rs, 19)] = FNMS(T7j, T7m, T7f * T7i);
Chris@82 1826 ci[WS(rs, 19)] = FMA(T7f, T7m, T7j * T7i);
Chris@82 1827 }
Chris@82 1828 }
Chris@82 1829 }
Chris@82 1830 }
Chris@82 1831 }
Chris@82 1832
Chris@82 1833 static const tw_instr twinstr[] = {
Chris@82 1834 {TW_FULL, 1, 32},
Chris@82 1835 {TW_NEXT, 1, 0}
Chris@82 1836 };
Chris@82 1837
Chris@82 1838 static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@82 1839
Chris@82 1840 void X(codelet_hb_32) (planner *p) {
Chris@82 1841 X(khc2hc_register) (p, hb_32, &desc);
Chris@82 1842 }
Chris@82 1843 #endif