annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:48 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@42 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@42 33 * 135 stack variables, 7 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 {
Chris@42 47 INT m;
Chris@42 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@42 49 E T5o, T5r, T5q, T5n, T5s, T5p;
Chris@42 50 {
Chris@42 51 E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52;
Chris@42 52 E T42, TZ, T6X, T1X, T3p, T8p, T8B, T3o, T26, T58, T4n, T7T, T7z, T59, T4k;
Chris@42 53 E T6p, T6a, TK, T6W, T2o, T3m, T8s, T8A, T3l, T2x, T55, T4g, T7S, T7G, T56;
Chris@42 54 E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48;
Chris@42 55 E T3j, T2O, T53, T4y;
Chris@42 56 {
Chris@42 57 E T62, T69, T4j, T4i;
Chris@42 58 {
Chris@42 59 E T6l, T6i, T40, T41;
Chris@42 60 {
Chris@42 61 E T12, T3, T2D, T6, T6g, T2G, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a;
Chris@42 62 E T6j;
Chris@42 63 {
Chris@42 64 E T2E, T2F, T13, T14;
Chris@42 65 {
Chris@42 66 E T1, T2, T4, T5;
Chris@42 67 T1 = cr[0];
Chris@42 68 T2 = ci[WS(rs, 15)];
Chris@42 69 T4 = cr[WS(rs, 8)];
Chris@42 70 T5 = ci[WS(rs, 7)];
Chris@42 71 T2E = ci[WS(rs, 31)];
Chris@42 72 T12 = T1 - T2;
Chris@42 73 T3 = T1 + T2;
Chris@42 74 T2D = T4 - T5;
Chris@42 75 T6 = T4 + T5;
Chris@42 76 T2F = cr[WS(rs, 16)];
Chris@42 77 }
Chris@42 78 T13 = ci[WS(rs, 23)];
Chris@42 79 T14 = cr[WS(rs, 24)];
Chris@42 80 {
Chris@42 81 E Tb, Tc, T1d, T1e;
Chris@42 82 Tb = ci[WS(rs, 3)];
Chris@42 83 T6g = T2E - T2F;
Chris@42 84 T2G = T2E + T2F;
Chris@42 85 T6h = T13 - T14;
Chris@42 86 T15 = T13 + T14;
Chris@42 87 Tc = cr[WS(rs, 12)];
Chris@42 88 T1d = ci[WS(rs, 19)];
Chris@42 89 T1e = cr[WS(rs, 28)];
Chris@42 90 {
Chris@42 91 E T8, T1c, T1f, T9, T18, T19;
Chris@42 92 T8 = cr[WS(rs, 4)];
Chris@42 93 Td = Tb + Tc;
Chris@42 94 T1c = Tb - Tc;
Chris@42 95 T6k = T1d - T1e;
Chris@42 96 T1f = T1d + T1e;
Chris@42 97 T9 = ci[WS(rs, 11)];
Chris@42 98 T18 = ci[WS(rs, 27)];
Chris@42 99 T19 = cr[WS(rs, 20)];
Chris@42 100 T1g = T1c - T1f;
Chris@42 101 T2J = T1c + T1f;
Chris@42 102 Ta = T8 + T9;
Chris@42 103 T17 = T8 - T9;
Chris@42 104 T1a = T18 + T19;
Chris@42 105 T6j = T18 - T19;
Chris@42 106 }
Chris@42 107 }
Chris@42 108 }
Chris@42 109 {
Chris@42 110 E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K;
Chris@42 111 {
Chris@42 112 E T7i, T7, T1b, Te, T7j;
Chris@42 113 T7i = T3 - T6;
Chris@42 114 T7 = T3 + T6;
Chris@42 115 T2I = T17 + T1a;
Chris@42 116 T1b = T17 - T1a;
Chris@42 117 Te = Ta + Td;
Chris@42 118 T7M = Ta - Td;
Chris@42 119 T7j = T6k - T6j;
Chris@42 120 T6l = T6j + T6k;
Chris@42 121 T6i = T6g + T6h;
Chris@42 122 T7L = T6g - T6h;
Chris@42 123 T5K = T7 - Te;
Chris@42 124 Tf = T7 + Te;
Chris@42 125 T8k = T7i + T7j;
Chris@42 126 T7k = T7i - T7j;
Chris@42 127 T40 = T12 + T15;
Chris@42 128 T16 = T12 - T15;
Chris@42 129 T1h = T1b + T1g;
Chris@42 130 T4u = T1b - T1g;
Chris@42 131 }
Chris@42 132 T4t = T2G - T2D;
Chris@42 133 T2H = T2D + T2G;
Chris@42 134 T8x = T7M + T7L;
Chris@42 135 T7N = T7L - T7M;
Chris@42 136 T3i = FMA(KP707106781, T1h, T16);
Chris@42 137 T1i = FNMS(KP707106781, T1h, T16);
Chris@42 138 T2K = T2I - T2J;
Chris@42 139 T41 = T2I + T2J;
Chris@42 140 T3v = FMA(KP707106781, T2K, T2H);
Chris@42 141 T2L = FNMS(KP707106781, T2K, T2H);
Chris@42 142 T5f = FNMS(KP707106781, T4u, T4t);
Chris@42 143 T4v = FMA(KP707106781, T4u, T4t);
Chris@42 144 }
Chris@42 145 }
Chris@42 146 {
Chris@42 147 E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TV, T1M, TU, T67, T1U, TW, T1N;
Chris@42 148 E T1O;
Chris@42 149 {
Chris@42 150 E TL, TM, TO, TP, T63, T64;
Chris@42 151 TL = ci[0];
Chris@42 152 T6T = T6i + T6l;
Chris@42 153 T6m = T6i - T6l;
Chris@42 154 T52 = FMA(KP707106781, T41, T40);
Chris@42 155 T42 = FNMS(KP707106781, T41, T40);
Chris@42 156 TM = cr[WS(rs, 15)];
Chris@42 157 TO = cr[WS(rs, 7)];
Chris@42 158 TP = ci[WS(rs, 8)];
Chris@42 159 {
Chris@42 160 E T1I, TN, TQ, T1J, T1Z, T20;
Chris@42 161 T1I = ci[WS(rs, 16)];
Chris@42 162 T1Y = TL - TM;
Chris@42 163 TN = TL + TM;
Chris@42 164 T1H = TO - TP;
Chris@42 165 TQ = TO + TP;
Chris@42 166 T1J = cr[WS(rs, 31)];
Chris@42 167 T1Z = ci[WS(rs, 24)];
Chris@42 168 T20 = cr[WS(rs, 23)];
Chris@42 169 TR = TN + TQ;
Chris@42 170 T7w = TN - TQ;
Chris@42 171 T1K = T1I + T1J;
Chris@42 172 T63 = T1I - T1J;
Chris@42 173 T64 = T1Z - T20;
Chris@42 174 T21 = T1Z + T20;
Chris@42 175 }
Chris@42 176 {
Chris@42 177 E TS, TT, T1S, T1T;
Chris@42 178 TS = cr[WS(rs, 3)];
Chris@42 179 T65 = T63 + T64;
Chris@42 180 T7t = T63 - T64;
Chris@42 181 TT = ci[WS(rs, 12)];
Chris@42 182 T1S = ci[WS(rs, 20)];
Chris@42 183 T1T = cr[WS(rs, 27)];
Chris@42 184 TV = ci[WS(rs, 4)];
Chris@42 185 T1M = TS - TT;
Chris@42 186 TU = TS + TT;
Chris@42 187 T67 = T1S - T1T;
Chris@42 188 T1U = T1S + T1T;
Chris@42 189 TW = cr[WS(rs, 11)];
Chris@42 190 T1N = ci[WS(rs, 28)];
Chris@42 191 T1O = cr[WS(rs, 19)];
Chris@42 192 }
Chris@42 193 }
Chris@42 194 {
Chris@42 195 E T4l, T1L, T24, T23, T8n, T7v, T1W, T8o, T7y, T4m, T22, T25;
Chris@42 196 {
Chris@42 197 E T1V, T7u, T7x, T1Q, T1R, TX;
Chris@42 198 T4l = T1H + T1K;
Chris@42 199 T1L = T1H - T1K;
Chris@42 200 T1R = TV - TW;
Chris@42 201 TX = TV + TW;
Chris@42 202 {
Chris@42 203 E T66, T1P, TY, T68;
Chris@42 204 T66 = T1N - T1O;
Chris@42 205 T1P = T1N + T1O;
Chris@42 206 T24 = T1R - T1U;
Chris@42 207 T1V = T1R + T1U;
Chris@42 208 T7u = TU - TX;
Chris@42 209 TY = TU + TX;
Chris@42 210 T68 = T66 + T67;
Chris@42 211 T7x = T67 - T66;
Chris@42 212 T23 = T1M - T1P;
Chris@42 213 T1Q = T1M + T1P;
Chris@42 214 TZ = TR + TY;
Chris@42 215 T62 = TR - TY;
Chris@42 216 T69 = T65 - T68;
Chris@42 217 T6X = T65 + T68;
Chris@42 218 }
Chris@42 219 T8n = T7u + T7t;
Chris@42 220 T7v = T7t - T7u;
Chris@42 221 T4j = T1Q + T1V;
Chris@42 222 T1W = T1Q - T1V;
Chris@42 223 T8o = T7w + T7x;
Chris@42 224 T7y = T7w - T7x;
Chris@42 225 }
Chris@42 226 T4i = T1Y + T21;
Chris@42 227 T22 = T1Y - T21;
Chris@42 228 T25 = T23 + T24;
Chris@42 229 T4m = T23 - T24;
Chris@42 230 T1X = FNMS(KP707106781, T1W, T1L);
Chris@42 231 T3p = FMA(KP707106781, T1W, T1L);
Chris@42 232 T8p = FNMS(KP414213562, T8o, T8n);
Chris@42 233 T8B = FMA(KP414213562, T8n, T8o);
Chris@42 234 T3o = FMA(KP707106781, T25, T22);
Chris@42 235 T26 = FNMS(KP707106781, T25, T22);
Chris@42 236 T58 = FMA(KP707106781, T4m, T4l);
Chris@42 237 T4n = FNMS(KP707106781, T4m, T4l);
Chris@42 238 T7T = FNMS(KP414213562, T7v, T7y);
Chris@42 239 T7z = FMA(KP414213562, T7y, T7v);
Chris@42 240 }
Chris@42 241 }
Chris@42 242 }
Chris@42 243 {
Chris@42 244 E T5T, T60, T4c, T4b;
Chris@42 245 {
Chris@42 246 E T2p, T28, TC, T7D, T2b, T2s, T5W, T7A, TG, T2d, TF, T5Y, T2l, TH, T2e;
Chris@42 247 E T2f;
Chris@42 248 {
Chris@42 249 E Tw, Tx, Tz, TA, T5U, T5V;
Chris@42 250 Tw = cr[WS(rs, 1)];
Chris@42 251 T59 = FMA(KP707106781, T4j, T4i);
Chris@42 252 T4k = FNMS(KP707106781, T4j, T4i);
Chris@42 253 T6p = T69 - T62;
Chris@42 254 T6a = T62 + T69;
Chris@42 255 Tx = ci[WS(rs, 14)];
Chris@42 256 Tz = cr[WS(rs, 9)];
Chris@42 257 TA = ci[WS(rs, 6)];
Chris@42 258 {
Chris@42 259 E T29, Ty, TB, T2a, T2q, T2r;
Chris@42 260 T29 = ci[WS(rs, 30)];
Chris@42 261 T2p = Tw - Tx;
Chris@42 262 Ty = Tw + Tx;
Chris@42 263 T28 = Tz - TA;
Chris@42 264 TB = Tz + TA;
Chris@42 265 T2a = cr[WS(rs, 17)];
Chris@42 266 T2q = ci[WS(rs, 22)];
Chris@42 267 T2r = cr[WS(rs, 25)];
Chris@42 268 TC = Ty + TB;
Chris@42 269 T7D = Ty - TB;
Chris@42 270 T2b = T29 + T2a;
Chris@42 271 T5U = T29 - T2a;
Chris@42 272 T5V = T2q - T2r;
Chris@42 273 T2s = T2q + T2r;
Chris@42 274 }
Chris@42 275 {
Chris@42 276 E TD, TE, T2j, T2k;
Chris@42 277 TD = cr[WS(rs, 5)];
Chris@42 278 T5W = T5U + T5V;
Chris@42 279 T7A = T5U - T5V;
Chris@42 280 TE = ci[WS(rs, 10)];
Chris@42 281 T2j = ci[WS(rs, 18)];
Chris@42 282 T2k = cr[WS(rs, 29)];
Chris@42 283 TG = ci[WS(rs, 2)];
Chris@42 284 T2d = TD - TE;
Chris@42 285 TF = TD + TE;
Chris@42 286 T5Y = T2j - T2k;
Chris@42 287 T2l = T2j + T2k;
Chris@42 288 TH = cr[WS(rs, 13)];
Chris@42 289 T2e = ci[WS(rs, 26)];
Chris@42 290 T2f = cr[WS(rs, 21)];
Chris@42 291 }
Chris@42 292 }
Chris@42 293 {
Chris@42 294 E T4e, T2c, T2v, T2u, T8q, T7C, T2n, T8r, T7F, T4f, T2t, T2w;
Chris@42 295 {
Chris@42 296 E T2m, T7B, T7E, T2h, T2i, TI;
Chris@42 297 T4e = T2b - T28;
Chris@42 298 T2c = T28 + T2b;
Chris@42 299 T2i = TG - TH;
Chris@42 300 TI = TG + TH;
Chris@42 301 {
Chris@42 302 E T5X, T2g, TJ, T5Z;
Chris@42 303 T5X = T2e - T2f;
Chris@42 304 T2g = T2e + T2f;
Chris@42 305 T2v = T2i - T2l;
Chris@42 306 T2m = T2i + T2l;
Chris@42 307 T7B = TF - TI;
Chris@42 308 TJ = TF + TI;
Chris@42 309 T5Z = T5X + T5Y;
Chris@42 310 T7E = T5Y - T5X;
Chris@42 311 T2u = T2d - T2g;
Chris@42 312 T2h = T2d + T2g;
Chris@42 313 TK = TC + TJ;
Chris@42 314 T5T = TC - TJ;
Chris@42 315 T60 = T5W - T5Z;
Chris@42 316 T6W = T5W + T5Z;
Chris@42 317 }
Chris@42 318 T8q = T7B + T7A;
Chris@42 319 T7C = T7A - T7B;
Chris@42 320 T4c = T2h + T2m;
Chris@42 321 T2n = T2h - T2m;
Chris@42 322 T8r = T7D + T7E;
Chris@42 323 T7F = T7D - T7E;
Chris@42 324 }
Chris@42 325 T4b = T2p + T2s;
Chris@42 326 T2t = T2p - T2s;
Chris@42 327 T2w = T2u + T2v;
Chris@42 328 T4f = T2v - T2u;
Chris@42 329 T2o = FNMS(KP707106781, T2n, T2c);
Chris@42 330 T3m = FMA(KP707106781, T2n, T2c);
Chris@42 331 T8s = FMA(KP414213562, T8r, T8q);
Chris@42 332 T8A = FNMS(KP414213562, T8q, T8r);
Chris@42 333 T3l = FMA(KP707106781, T2w, T2t);
Chris@42 334 T2x = FNMS(KP707106781, T2w, T2t);
Chris@42 335 T55 = FMA(KP707106781, T4f, T4e);
Chris@42 336 T4g = FNMS(KP707106781, T4f, T4e);
Chris@42 337 T7S = FMA(KP414213562, T7C, T7F);
Chris@42 338 T7G = FNMS(KP414213562, T7F, T7C);
Chris@42 339 }
Chris@42 340 }
Chris@42 341 {
Chris@42 342 E T44, T1D, Tm, T7o, T7p, T43, T1y, T47, T1s, Tt, T7m, T7l, T46, T1n;
Chris@42 343 {
Chris@42 344 E Tj, T1z, Ti, T5P, T1C, Tk, T1v, T1w;
Chris@42 345 {
Chris@42 346 E Tg, Th, T1A, T1B;
Chris@42 347 Tg = cr[WS(rs, 2)];
Chris@42 348 T56 = FMA(KP707106781, T4c, T4b);
Chris@42 349 T4d = FNMS(KP707106781, T4c, T4b);
Chris@42 350 T6o = T5T + T60;
Chris@42 351 T61 = T5T - T60;
Chris@42 352 Th = ci[WS(rs, 13)];
Chris@42 353 T1A = ci[WS(rs, 21)];
Chris@42 354 T1B = cr[WS(rs, 26)];
Chris@42 355 Tj = cr[WS(rs, 10)];
Chris@42 356 T1z = Tg - Th;
Chris@42 357 Ti = Tg + Th;
Chris@42 358 T5P = T1A - T1B;
Chris@42 359 T1C = T1A + T1B;
Chris@42 360 Tk = ci[WS(rs, 5)];
Chris@42 361 T1v = ci[WS(rs, 29)];
Chris@42 362 T1w = cr[WS(rs, 18)];
Chris@42 363 }
Chris@42 364 {
Chris@42 365 E T1u, Tl, T5O, T1x;
Chris@42 366 T44 = T1z + T1C;
Chris@42 367 T1D = T1z - T1C;
Chris@42 368 T1u = Tj - Tk;
Chris@42 369 Tl = Tj + Tk;
Chris@42 370 T5O = T1v - T1w;
Chris@42 371 T1x = T1v + T1w;
Chris@42 372 Tm = Ti + Tl;
Chris@42 373 T7o = Ti - Tl;
Chris@42 374 T7p = T5O - T5P;
Chris@42 375 T5Q = T5O + T5P;
Chris@42 376 T43 = T1x - T1u;
Chris@42 377 T1y = T1u + T1x;
Chris@42 378 }
Chris@42 379 }
Chris@42 380 {
Chris@42 381 E Tq, T1o, Tp, T5M, T1r, Tr, T1k, T1l;
Chris@42 382 {
Chris@42 383 E Tn, To, T1p, T1q;
Chris@42 384 Tn = ci[WS(rs, 1)];
Chris@42 385 To = cr[WS(rs, 14)];
Chris@42 386 T1p = ci[WS(rs, 25)];
Chris@42 387 T1q = cr[WS(rs, 22)];
Chris@42 388 Tq = cr[WS(rs, 6)];
Chris@42 389 T1o = Tn - To;
Chris@42 390 Tp = Tn + To;
Chris@42 391 T5M = T1p - T1q;
Chris@42 392 T1r = T1p + T1q;
Chris@42 393 Tr = ci[WS(rs, 9)];
Chris@42 394 T1k = ci[WS(rs, 17)];
Chris@42 395 T1l = cr[WS(rs, 30)];
Chris@42 396 }
Chris@42 397 {
Chris@42 398 E T1j, Ts, T5L, T1m;
Chris@42 399 T47 = T1o + T1r;
Chris@42 400 T1s = T1o - T1r;
Chris@42 401 T1j = Tq - Tr;
Chris@42 402 Ts = Tq + Tr;
Chris@42 403 T5L = T1k - T1l;
Chris@42 404 T1m = T1k + T1l;
Chris@42 405 Tt = Tp + Ts;
Chris@42 406 T7m = Tp - Ts;
Chris@42 407 T7l = T5L - T5M;
Chris@42 408 T5N = T5L + T5M;
Chris@42 409 T46 = T1j + T1m;
Chris@42 410 T1n = T1j - T1m;
Chris@42 411 }
Chris@42 412 }
Chris@42 413 {
Chris@42 414 E T7P, T7O, T2N, T1t, T1E, T2M, T7n, T7q, T4w, T4x;
Chris@42 415 T7P = T7m + T7l;
Chris@42 416 T7n = T7l - T7m;
Chris@42 417 T7q = T7o + T7p;
Chris@42 418 T7O = T7o - T7p;
Chris@42 419 T6f = Tm - Tt;
Chris@42 420 Tu = Tm + Tt;
Chris@42 421 T8y = T7q + T7n;
Chris@42 422 T7r = T7n - T7q;
Chris@42 423 T2N = FMA(KP414213562, T1n, T1s);
Chris@42 424 T1t = FNMS(KP414213562, T1s, T1n);
Chris@42 425 T1E = FMA(KP414213562, T1D, T1y);
Chris@42 426 T2M = FNMS(KP414213562, T1y, T1D);
Chris@42 427 T8l = T7O + T7P;
Chris@42 428 T7Q = T7O - T7P;
Chris@42 429 T3w = T1E + T1t;
Chris@42 430 T1F = T1t - T1E;
Chris@42 431 T45 = FNMS(KP414213562, T44, T43);
Chris@42 432 T4w = FMA(KP414213562, T43, T44);
Chris@42 433 T4x = FMA(KP414213562, T46, T47);
Chris@42 434 T48 = FNMS(KP414213562, T47, T46);
Chris@42 435 T3j = T2M + T2N;
Chris@42 436 T2O = T2M - T2N;
Chris@42 437 T53 = T4w + T4x;
Chris@42 438 T4y = T4w - T4x;
Chris@42 439 }
Chris@42 440 }
Chris@42 441 }
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E T72, T5g, T49, T78, T77, T73, T7s, T7U, T7R, T7H, T3f, T3e, T3d;
Chris@42 445 {
Chris@42 446 E T5R, T8m, T8C, T8z, T8t, T8e, T86, T88, T8h, T8f, T8i, T8c, T8g;
Chris@42 447 {
Chris@42 448 E T6P, T6Q, T6Z, T6S, T6R;
Chris@42 449 {
Chris@42 450 E Tv, T10, T6V, T6Y, T6U;
Chris@42 451 T72 = Tf - Tu;
Chris@42 452 Tv = Tf + Tu;
Chris@42 453 T6U = T5Q + T5N;
Chris@42 454 T5R = T5N - T5Q;
Chris@42 455 T5g = T48 - T45;
Chris@42 456 T49 = T45 + T48;
Chris@42 457 T10 = TK + TZ;
Chris@42 458 T78 = TK - TZ;
Chris@42 459 T77 = T6T - T6U;
Chris@42 460 T6V = T6T + T6U;
Chris@42 461 T6Y = T6W + T6X;
Chris@42 462 T73 = T6X - T6W;
Chris@42 463 T6P = W[30];
Chris@42 464 cr[0] = Tv + T10;
Chris@42 465 T6Q = Tv - T10;
Chris@42 466 ci[0] = T6V + T6Y;
Chris@42 467 T6Z = T6V - T6Y;
Chris@42 468 T6S = W[31];
Chris@42 469 T6R = T6P * T6Q;
Chris@42 470 }
Chris@42 471 {
Chris@42 472 E T8O, T8W, T8Q, T8Z, T8X, T90, T8U, T8Y;
Chris@42 473 {
Chris@42 474 E T8R, T8S, T8M, T8N, T70;
Chris@42 475 T8M = FMA(KP707106781, T8l, T8k);
Chris@42 476 T8m = FNMS(KP707106781, T8l, T8k);
Chris@42 477 T8C = T8A - T8B;
Chris@42 478 T8N = T8A + T8B;
Chris@42 479 T70 = T6S * T6Q;
Chris@42 480 cr[WS(rs, 16)] = FNMS(T6S, T6Z, T6R);
Chris@42 481 T8R = FMA(KP707106781, T8y, T8x);
Chris@42 482 T8z = FNMS(KP707106781, T8y, T8x);
Chris@42 483 T8O = FNMS(KP923879532, T8N, T8M);
Chris@42 484 T8W = FMA(KP923879532, T8N, T8M);
Chris@42 485 ci[WS(rs, 16)] = FMA(T6P, T6Z, T70);
Chris@42 486 T8S = T8s + T8p;
Chris@42 487 T8t = T8p - T8s;
Chris@42 488 {
Chris@42 489 E T8L, T8T, T8P, T8V;
Chris@42 490 T8L = W[34];
Chris@42 491 T8Q = W[35];
Chris@42 492 T8V = W[2];
Chris@42 493 T8Z = FMA(KP923879532, T8S, T8R);
Chris@42 494 T8T = FNMS(KP923879532, T8S, T8R);
Chris@42 495 T8P = T8L * T8O;
Chris@42 496 T8X = T8V * T8W;
Chris@42 497 T90 = T8V * T8Z;
Chris@42 498 T8U = T8L * T8T;
Chris@42 499 cr[WS(rs, 18)] = FNMS(T8Q, T8T, T8P);
Chris@42 500 T8Y = W[3];
Chris@42 501 }
Chris@42 502 }
Chris@42 503 {
Chris@42 504 E T89, T8a, T84, T85;
Chris@42 505 T84 = FNMS(KP707106781, T7r, T7k);
Chris@42 506 T7s = FMA(KP707106781, T7r, T7k);
Chris@42 507 ci[WS(rs, 18)] = FMA(T8Q, T8O, T8U);
Chris@42 508 T85 = T7S + T7T;
Chris@42 509 T7U = T7S - T7T;
Chris@42 510 ci[WS(rs, 2)] = FMA(T8Y, T8W, T90);
Chris@42 511 cr[WS(rs, 2)] = FNMS(T8Y, T8Z, T8X);
Chris@42 512 T7R = FMA(KP707106781, T7Q, T7N);
Chris@42 513 T89 = FNMS(KP707106781, T7Q, T7N);
Chris@42 514 T8e = FMA(KP923879532, T85, T84);
Chris@42 515 T86 = FNMS(KP923879532, T85, T84);
Chris@42 516 T8a = T7G + T7z;
Chris@42 517 T7H = T7z - T7G;
Chris@42 518 {
Chris@42 519 E T83, T8b, T87, T8d;
Chris@42 520 T83 = W[26];
Chris@42 521 T88 = W[27];
Chris@42 522 T8d = W[58];
Chris@42 523 T8h = FMA(KP923879532, T8a, T89);
Chris@42 524 T8b = FNMS(KP923879532, T8a, T89);
Chris@42 525 T87 = T83 * T86;
Chris@42 526 T8f = T8d * T8e;
Chris@42 527 T8i = T8d * T8h;
Chris@42 528 T8c = T83 * T8b;
Chris@42 529 cr[WS(rs, 14)] = FNMS(T88, T8b, T87);
Chris@42 530 T8g = W[59];
Chris@42 531 }
Chris@42 532 }
Chris@42 533 }
Chris@42 534 }
Chris@42 535 {
Chris@42 536 E T5S, T6q, T6n, T6K, T6C, T6b, T6E, T6N, T6L, T6O, T6I, T6M;
Chris@42 537 {
Chris@42 538 E T6F, T6G, T6A, T6B;
Chris@42 539 T6A = T5K - T5R;
Chris@42 540 T5S = T5K + T5R;
Chris@42 541 ci[WS(rs, 14)] = FMA(T88, T86, T8c);
Chris@42 542 T6B = T6p - T6o;
Chris@42 543 T6q = T6o + T6p;
Chris@42 544 ci[WS(rs, 30)] = FMA(T8g, T8e, T8i);
Chris@42 545 cr[WS(rs, 30)] = FNMS(T8g, T8h, T8f);
Chris@42 546 T6n = T6f + T6m;
Chris@42 547 T6F = T6m - T6f;
Chris@42 548 T6K = FMA(KP707106781, T6B, T6A);
Chris@42 549 T6C = FNMS(KP707106781, T6B, T6A);
Chris@42 550 T6G = T61 - T6a;
Chris@42 551 T6b = T61 + T6a;
Chris@42 552 {
Chris@42 553 E T6z, T6H, T6D, T6J;
Chris@42 554 T6z = W[54];
Chris@42 555 T6E = W[55];
Chris@42 556 T6J = W[22];
Chris@42 557 T6N = FMA(KP707106781, T6G, T6F);
Chris@42 558 T6H = FNMS(KP707106781, T6G, T6F);
Chris@42 559 T6D = T6z * T6C;
Chris@42 560 T6L = T6J * T6K;
Chris@42 561 T6O = T6J * T6N;
Chris@42 562 T6I = T6z * T6H;
Chris@42 563 cr[WS(rs, 28)] = FNMS(T6E, T6H, T6D);
Chris@42 564 T6M = W[23];
Chris@42 565 }
Chris@42 566 }
Chris@42 567 {
Chris@42 568 E T8G, T8F, T8J, T8H, T8I, T8u;
Chris@42 569 ci[WS(rs, 28)] = FMA(T6E, T6C, T6I);
Chris@42 570 ci[WS(rs, 12)] = FMA(T6M, T6K, T6O);
Chris@42 571 cr[WS(rs, 12)] = FNMS(T6M, T6N, T6L);
Chris@42 572 T8G = FMA(KP923879532, T8t, T8m);
Chris@42 573 T8u = FNMS(KP923879532, T8t, T8m);
Chris@42 574 {
Chris@42 575 E T8j, T8w, T8D, T8v, T8E;
Chris@42 576 T8j = W[50];
Chris@42 577 T8w = W[51];
Chris@42 578 T8F = W[18];
Chris@42 579 T8J = FMA(KP923879532, T8C, T8z);
Chris@42 580 T8D = FNMS(KP923879532, T8C, T8z);
Chris@42 581 T8v = T8j * T8u;
Chris@42 582 T8E = T8w * T8u;
Chris@42 583 T8H = T8F * T8G;
Chris@42 584 T8I = W[19];
Chris@42 585 cr[WS(rs, 26)] = FNMS(T8w, T8D, T8v);
Chris@42 586 ci[WS(rs, 26)] = FMA(T8j, T8D, T8E);
Chris@42 587 }
Chris@42 588 {
Chris@42 589 E T6c, T6u, T6x, T6r, T8K, T5J, T6e;
Chris@42 590 cr[WS(rs, 10)] = FNMS(T8I, T8J, T8H);
Chris@42 591 T8K = T8I * T8G;
Chris@42 592 ci[WS(rs, 10)] = FMA(T8F, T8J, T8K);
Chris@42 593 T6c = FNMS(KP707106781, T6b, T5S);
Chris@42 594 T6u = FMA(KP707106781, T6b, T5S);
Chris@42 595 T6x = FMA(KP707106781, T6q, T6n);
Chris@42 596 T6r = FNMS(KP707106781, T6q, T6n);
Chris@42 597 T5J = W[38];
Chris@42 598 T6e = W[39];
Chris@42 599 {
Chris@42 600 E T6t, T6w, T6d, T6s, T6v, T6y;
Chris@42 601 T6t = W[6];
Chris@42 602 T6w = W[7];
Chris@42 603 T6d = T5J * T6c;
Chris@42 604 T6s = T6e * T6c;
Chris@42 605 T6v = T6t * T6u;
Chris@42 606 T6y = T6w * T6u;
Chris@42 607 cr[WS(rs, 20)] = FNMS(T6e, T6r, T6d);
Chris@42 608 ci[WS(rs, 20)] = FMA(T5J, T6r, T6s);
Chris@42 609 cr[WS(rs, 4)] = FNMS(T6w, T6x, T6v);
Chris@42 610 ci[WS(rs, 4)] = FMA(T6t, T6x, T6y);
Chris@42 611 }
Chris@42 612 }
Chris@42 613 }
Chris@42 614 }
Chris@42 615 }
Chris@42 616 {
Chris@42 617 E T7c, T7f, T7e, T7g, T7d;
Chris@42 618 {
Chris@42 619 E T71, T74, T79, T76, T75, T7b, T7a;
Chris@42 620 T71 = W[46];
Chris@42 621 T7c = T72 + T73;
Chris@42 622 T74 = T72 - T73;
Chris@42 623 T7f = T78 + T77;
Chris@42 624 T79 = T77 - T78;
Chris@42 625 T76 = W[47];
Chris@42 626 T75 = T71 * T74;
Chris@42 627 T7b = W[14];
Chris@42 628 T7a = T71 * T79;
Chris@42 629 T7e = W[15];
Chris@42 630 cr[WS(rs, 24)] = FNMS(T76, T79, T75);
Chris@42 631 T7g = T7b * T7f;
Chris@42 632 T7d = T7b * T7c;
Chris@42 633 ci[WS(rs, 24)] = FMA(T76, T74, T7a);
Chris@42 634 }
Chris@42 635 {
Chris@42 636 E T81, T7X, T80, T7Z, T82;
Chris@42 637 ci[WS(rs, 8)] = FMA(T7e, T7c, T7g);
Chris@42 638 cr[WS(rs, 8)] = FNMS(T7e, T7f, T7d);
Chris@42 639 {
Chris@42 640 E T7h, T7Y, T7I, T7V, T7K, T7J, T7W;
Chris@42 641 T7h = W[42];
Chris@42 642 T7Y = FMA(KP923879532, T7H, T7s);
Chris@42 643 T7I = FNMS(KP923879532, T7H, T7s);
Chris@42 644 T81 = FMA(KP923879532, T7U, T7R);
Chris@42 645 T7V = FNMS(KP923879532, T7U, T7R);
Chris@42 646 T7K = W[43];
Chris@42 647 T7J = T7h * T7I;
Chris@42 648 T7X = W[10];
Chris@42 649 T80 = W[11];
Chris@42 650 T7W = T7K * T7I;
Chris@42 651 cr[WS(rs, 22)] = FNMS(T7K, T7V, T7J);
Chris@42 652 T7Z = T7X * T7Y;
Chris@42 653 T82 = T80 * T7Y;
Chris@42 654 ci[WS(rs, 22)] = FMA(T7h, T7V, T7W);
Chris@42 655 }
Chris@42 656 {
Chris@42 657 E T2P, T37, T1G, T32, T2R, T2Q, T38, T2z, T27, T2y;
Chris@42 658 T2P = FMA(KP923879532, T2O, T2L);
Chris@42 659 T37 = FNMS(KP923879532, T2O, T2L);
Chris@42 660 cr[WS(rs, 6)] = FNMS(T80, T81, T7Z);
Chris@42 661 ci[WS(rs, 6)] = FMA(T7X, T81, T82);
Chris@42 662 T1G = FMA(KP923879532, T1F, T1i);
Chris@42 663 T32 = FNMS(KP923879532, T1F, T1i);
Chris@42 664 T2R = FNMS(KP668178637, T1X, T26);
Chris@42 665 T27 = FMA(KP668178637, T26, T1X);
Chris@42 666 T2y = FNMS(KP668178637, T2x, T2o);
Chris@42 667 T2Q = FMA(KP668178637, T2o, T2x);
Chris@42 668 T38 = T2y + T27;
Chris@42 669 T2z = T27 - T2y;
Chris@42 670 {
Chris@42 671 E T2C, T2A, T3c, T34, T2U, T39, T36, T31;
Chris@42 672 {
Chris@42 673 E T11, T2W, T2S, T33;
Chris@42 674 T11 = W[40];
Chris@42 675 T2C = W[41];
Chris@42 676 T2A = FNMS(KP831469612, T2z, T1G);
Chris@42 677 T2W = FMA(KP831469612, T2z, T1G);
Chris@42 678 T2S = T2Q - T2R;
Chris@42 679 T33 = T2Q + T2R;
Chris@42 680 {
Chris@42 681 E T2V, T2B, T2T, T2Z, T2X, T2Y, T30;
Chris@42 682 T2V = W[8];
Chris@42 683 T2B = T11 * T2A;
Chris@42 684 T3c = FMA(KP831469612, T33, T32);
Chris@42 685 T34 = FNMS(KP831469612, T33, T32);
Chris@42 686 T2T = FNMS(KP831469612, T2S, T2P);
Chris@42 687 T2Z = FMA(KP831469612, T2S, T2P);
Chris@42 688 T2X = T2V * T2W;
Chris@42 689 T2Y = W[9];
Chris@42 690 T30 = T2V * T2Z;
Chris@42 691 cr[WS(rs, 21)] = FNMS(T2C, T2T, T2B);
Chris@42 692 T2U = T11 * T2T;
Chris@42 693 cr[WS(rs, 5)] = FNMS(T2Y, T2Z, T2X);
Chris@42 694 ci[WS(rs, 5)] = FMA(T2Y, T2W, T30);
Chris@42 695 }
Chris@42 696 }
Chris@42 697 T39 = FNMS(KP831469612, T38, T37);
Chris@42 698 T3f = FMA(KP831469612, T38, T37);
Chris@42 699 ci[WS(rs, 21)] = FMA(T2C, T2A, T2U);
Chris@42 700 T36 = W[25];
Chris@42 701 T31 = W[24];
Chris@42 702 {
Chris@42 703 E T3b, T3g, T3a, T35;
Chris@42 704 T3e = W[57];
Chris@42 705 T3a = T36 * T34;
Chris@42 706 T35 = T31 * T34;
Chris@42 707 T3b = W[56];
Chris@42 708 T3g = T3e * T3c;
Chris@42 709 ci[WS(rs, 13)] = FMA(T31, T39, T3a);
Chris@42 710 cr[WS(rs, 13)] = FNMS(T36, T39, T35);
Chris@42 711 T3d = T3b * T3c;
Chris@42 712 ci[WS(rs, 29)] = FMA(T3b, T3f, T3g);
Chris@42 713 }
Chris@42 714 }
Chris@42 715 }
Chris@42 716 }
Chris@42 717 }
Chris@42 718 {
Chris@42 719 E T4G, T4J, T4I, T4F, T4K;
Chris@42 720 {
Chris@42 721 E T4z, T4R, T4a, T4M, T4h, T4o, T4C, T4N, T4A, T4B;
Chris@42 722 T4z = FMA(KP923879532, T4y, T4v);
Chris@42 723 T4R = FNMS(KP923879532, T4y, T4v);
Chris@42 724 T4a = FNMS(KP923879532, T49, T42);
Chris@42 725 T4M = FMA(KP923879532, T49, T42);
Chris@42 726 cr[WS(rs, 29)] = FNMS(T3e, T3f, T3d);
Chris@42 727 T4h = FNMS(KP668178637, T4g, T4d);
Chris@42 728 T4A = FMA(KP668178637, T4d, T4g);
Chris@42 729 T4B = FMA(KP668178637, T4k, T4n);
Chris@42 730 T4o = FNMS(KP668178637, T4n, T4k);
Chris@42 731 T4C = T4A - T4B;
Chris@42 732 T4N = T4A + T4B;
Chris@42 733 {
Chris@42 734 E T4W, T4Z, T4q, T4X, T50, T4Y;
Chris@42 735 {
Chris@42 736 E T4L, T4Q, T4O, T4p, T4S, T4P, T4U, T4V, T4T;
Chris@42 737 T4L = W[20];
Chris@42 738 T4Q = W[21];
Chris@42 739 T4W = FMA(KP831469612, T4N, T4M);
Chris@42 740 T4O = FNMS(KP831469612, T4N, T4M);
Chris@42 741 T4p = T4h + T4o;
Chris@42 742 T4S = T4h - T4o;
Chris@42 743 T4P = T4L * T4O;
Chris@42 744 T4V = W[52];
Chris@42 745 T4Z = FNMS(KP831469612, T4S, T4R);
Chris@42 746 T4T = FMA(KP831469612, T4S, T4R);
Chris@42 747 T4q = FNMS(KP831469612, T4p, T4a);
Chris@42 748 T4G = FMA(KP831469612, T4p, T4a);
Chris@42 749 cr[WS(rs, 11)] = FNMS(T4Q, T4T, T4P);
Chris@42 750 T4U = T4L * T4T;
Chris@42 751 T4X = T4V * T4W;
Chris@42 752 T50 = T4V * T4Z;
Chris@42 753 T4Y = W[53];
Chris@42 754 ci[WS(rs, 11)] = FMA(T4Q, T4O, T4U);
Chris@42 755 }
Chris@42 756 {
Chris@42 757 E T4D, T4s, T3Z, T4E, T4r;
Chris@42 758 T4J = FMA(KP831469612, T4C, T4z);
Chris@42 759 T4D = FNMS(KP831469612, T4C, T4z);
Chris@42 760 T4s = W[37];
Chris@42 761 ci[WS(rs, 27)] = FMA(T4Y, T4W, T50);
Chris@42 762 cr[WS(rs, 27)] = FNMS(T4Y, T4Z, T4X);
Chris@42 763 T3Z = W[36];
Chris@42 764 T4E = T4s * T4q;
Chris@42 765 T4I = W[5];
Chris@42 766 T4r = T3Z * T4q;
Chris@42 767 ci[WS(rs, 19)] = FMA(T3Z, T4D, T4E);
Chris@42 768 T4F = W[4];
Chris@42 769 T4K = T4I * T4G;
Chris@42 770 cr[WS(rs, 19)] = FNMS(T4s, T4D, T4r);
Chris@42 771 }
Chris@42 772 }
Chris@42 773 }
Chris@42 774 {
Chris@42 775 E T3E, T3H, T3G, T3D, T3I;
Chris@42 776 {
Chris@42 777 E T3x, T3P, T3k, T3K, T3n, T3q, T3A, T3L, T4H, T3y, T3z;
Chris@42 778 T3x = FMA(KP923879532, T3w, T3v);
Chris@42 779 T3P = FNMS(KP923879532, T3w, T3v);
Chris@42 780 T4H = T4F * T4G;
Chris@42 781 ci[WS(rs, 3)] = FMA(T4F, T4J, T4K);
Chris@42 782 T3k = FMA(KP923879532, T3j, T3i);
Chris@42 783 T3K = FNMS(KP923879532, T3j, T3i);
Chris@42 784 T3y = FMA(KP198912367, T3l, T3m);
Chris@42 785 T3n = FNMS(KP198912367, T3m, T3l);
Chris@42 786 cr[WS(rs, 3)] = FNMS(T4I, T4J, T4H);
Chris@42 787 T3z = FNMS(KP198912367, T3o, T3p);
Chris@42 788 T3q = FMA(KP198912367, T3p, T3o);
Chris@42 789 T3A = T3y + T3z;
Chris@42 790 T3L = T3z - T3y;
Chris@42 791 {
Chris@42 792 E T3U, T3X, T3s, T3V, T3Y, T3W;
Chris@42 793 {
Chris@42 794 E T3J, T3O, T3M, T3r, T3Q, T3N, T3S, T3T, T3R;
Chris@42 795 T3J = W[48];
Chris@42 796 T3O = W[49];
Chris@42 797 T3U = FMA(KP980785280, T3L, T3K);
Chris@42 798 T3M = FNMS(KP980785280, T3L, T3K);
Chris@42 799 T3r = T3n + T3q;
Chris@42 800 T3Q = T3n - T3q;
Chris@42 801 T3N = T3J * T3M;
Chris@42 802 T3T = W[16];
Chris@42 803 T3X = FMA(KP980785280, T3Q, T3P);
Chris@42 804 T3R = FNMS(KP980785280, T3Q, T3P);
Chris@42 805 T3s = FNMS(KP980785280, T3r, T3k);
Chris@42 806 T3E = FMA(KP980785280, T3r, T3k);
Chris@42 807 cr[WS(rs, 25)] = FNMS(T3O, T3R, T3N);
Chris@42 808 T3S = T3J * T3R;
Chris@42 809 T3V = T3T * T3U;
Chris@42 810 T3Y = T3T * T3X;
Chris@42 811 T3W = W[17];
Chris@42 812 ci[WS(rs, 25)] = FMA(T3O, T3M, T3S);
Chris@42 813 }
Chris@42 814 {
Chris@42 815 E T3B, T3u, T3h, T3C, T3t;
Chris@42 816 T3H = FMA(KP980785280, T3A, T3x);
Chris@42 817 T3B = FNMS(KP980785280, T3A, T3x);
Chris@42 818 T3u = W[33];
Chris@42 819 ci[WS(rs, 9)] = FMA(T3W, T3U, T3Y);
Chris@42 820 cr[WS(rs, 9)] = FNMS(T3W, T3X, T3V);
Chris@42 821 T3h = W[32];
Chris@42 822 T3C = T3u * T3s;
Chris@42 823 T3G = W[1];
Chris@42 824 T3t = T3h * T3s;
Chris@42 825 ci[WS(rs, 17)] = FMA(T3h, T3B, T3C);
Chris@42 826 T3D = W[0];
Chris@42 827 T3I = T3G * T3E;
Chris@42 828 cr[WS(rs, 17)] = FNMS(T3u, T3B, T3t);
Chris@42 829 }
Chris@42 830 }
Chris@42 831 }
Chris@42 832 {
Chris@42 833 E T5h, T5z, T54, T5u, T57, T5a, T5k, T5v, T3F, T5i, T5j;
Chris@42 834 T5h = FMA(KP923879532, T5g, T5f);
Chris@42 835 T5z = FNMS(KP923879532, T5g, T5f);
Chris@42 836 T3F = T3D * T3E;
Chris@42 837 ci[WS(rs, 1)] = FMA(T3D, T3H, T3I);
Chris@42 838 T54 = FNMS(KP923879532, T53, T52);
Chris@42 839 T5u = FMA(KP923879532, T53, T52);
Chris@42 840 T5i = FMA(KP198912367, T55, T56);
Chris@42 841 T57 = FNMS(KP198912367, T56, T55);
Chris@42 842 cr[WS(rs, 1)] = FNMS(T3G, T3H, T3F);
Chris@42 843 T5j = FMA(KP198912367, T58, T59);
Chris@42 844 T5a = FNMS(KP198912367, T59, T58);
Chris@42 845 T5k = T5i - T5j;
Chris@42 846 T5v = T5i + T5j;
Chris@42 847 {
Chris@42 848 E T5E, T5H, T5c, T5F, T5I, T5G;
Chris@42 849 {
Chris@42 850 E T5t, T5y, T5w, T5b, T5A, T5x, T5C, T5D, T5B;
Chris@42 851 T5t = W[28];
Chris@42 852 T5y = W[29];
Chris@42 853 T5E = FMA(KP980785280, T5v, T5u);
Chris@42 854 T5w = FNMS(KP980785280, T5v, T5u);
Chris@42 855 T5b = T57 + T5a;
Chris@42 856 T5A = T5a - T57;
Chris@42 857 T5x = T5t * T5w;
Chris@42 858 T5D = W[60];
Chris@42 859 T5H = FNMS(KP980785280, T5A, T5z);
Chris@42 860 T5B = FMA(KP980785280, T5A, T5z);
Chris@42 861 T5c = FMA(KP980785280, T5b, T54);
Chris@42 862 T5o = FNMS(KP980785280, T5b, T54);
Chris@42 863 cr[WS(rs, 15)] = FNMS(T5y, T5B, T5x);
Chris@42 864 T5C = T5t * T5B;
Chris@42 865 T5F = T5D * T5E;
Chris@42 866 T5I = T5D * T5H;
Chris@42 867 T5G = W[61];
Chris@42 868 ci[WS(rs, 15)] = FMA(T5y, T5w, T5C);
Chris@42 869 }
Chris@42 870 {
Chris@42 871 E T5l, T5e, T51, T5m, T5d;
Chris@42 872 T5r = FMA(KP980785280, T5k, T5h);
Chris@42 873 T5l = FNMS(KP980785280, T5k, T5h);
Chris@42 874 T5e = W[45];
Chris@42 875 ci[WS(rs, 31)] = FMA(T5G, T5E, T5I);
Chris@42 876 cr[WS(rs, 31)] = FNMS(T5G, T5H, T5F);
Chris@42 877 T51 = W[44];
Chris@42 878 T5m = T5e * T5c;
Chris@42 879 T5q = W[13];
Chris@42 880 T5d = T51 * T5c;
Chris@42 881 ci[WS(rs, 23)] = FMA(T51, T5l, T5m);
Chris@42 882 T5n = W[12];
Chris@42 883 T5s = T5q * T5o;
Chris@42 884 cr[WS(rs, 23)] = FNMS(T5e, T5l, T5d);
Chris@42 885 }
Chris@42 886 }
Chris@42 887 }
Chris@42 888 }
Chris@42 889 }
Chris@42 890 }
Chris@42 891 }
Chris@42 892 T5p = T5n * T5o;
Chris@42 893 ci[WS(rs, 7)] = FMA(T5n, T5r, T5s);
Chris@42 894 cr[WS(rs, 7)] = FNMS(T5q, T5r, T5p);
Chris@42 895 }
Chris@42 896 }
Chris@42 897 }
Chris@42 898
Chris@42 899 static const tw_instr twinstr[] = {
Chris@42 900 {TW_FULL, 1, 32},
Chris@42 901 {TW_NEXT, 1, 0}
Chris@42 902 };
Chris@42 903
Chris@42 904 static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@42 905
Chris@42 906 void X(codelet_hb_32) (planner *p) {
Chris@42 907 X(khc2hc_register) (p, hb_32, &desc);
Chris@42 908 }
Chris@42 909 #else /* HAVE_FMA */
Chris@42 910
Chris@42 911 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */
Chris@42 912
Chris@42 913 /*
Chris@42 914 * This function contains 434 FP additions, 208 FP multiplications,
Chris@42 915 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@42 916 * 98 stack variables, 7 constants, and 128 memory accesses
Chris@42 917 */
Chris@42 918 #include "hb.h"
Chris@42 919
Chris@42 920 static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 921 {
Chris@42 922 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 923 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 924 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 925 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 926 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 927 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 928 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 929 {
Chris@42 930 INT m;
Chris@42 931 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@42 932 E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U;
Chris@42 933 E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z;
Chris@42 934 E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54;
Chris@42 935 E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64;
Chris@42 936 E T5n, T65, T38, T3Q;
Chris@42 937 {
Chris@42 938 E T3, T4m, T24, T4q, T27, T4t, T6, T5s, Ta, T4p, T1X, T5t, T20, T4n, Td;
Chris@42 939 E T4s;
Chris@42 940 {
Chris@42 941 E T1, T2, T22, T23;
Chris@42 942 T1 = cr[0];
Chris@42 943 T2 = ci[WS(rs, 15)];
Chris@42 944 T3 = T1 + T2;
Chris@42 945 T4m = T1 - T2;
Chris@42 946 T22 = ci[WS(rs, 27)];
Chris@42 947 T23 = cr[WS(rs, 20)];
Chris@42 948 T24 = T22 - T23;
Chris@42 949 T4q = T22 + T23;
Chris@42 950 }
Chris@42 951 {
Chris@42 952 E T25, T26, T4, T5;
Chris@42 953 T25 = ci[WS(rs, 19)];
Chris@42 954 T26 = cr[WS(rs, 28)];
Chris@42 955 T27 = T25 - T26;
Chris@42 956 T4t = T25 + T26;
Chris@42 957 T4 = cr[WS(rs, 8)];
Chris@42 958 T5 = ci[WS(rs, 7)];
Chris@42 959 T6 = T4 + T5;
Chris@42 960 T5s = T4 - T5;
Chris@42 961 }
Chris@42 962 {
Chris@42 963 E T8, T9, T1V, T1W;
Chris@42 964 T8 = cr[WS(rs, 4)];
Chris@42 965 T9 = ci[WS(rs, 11)];
Chris@42 966 Ta = T8 + T9;
Chris@42 967 T4p = T8 - T9;
Chris@42 968 T1V = ci[WS(rs, 31)];
Chris@42 969 T1W = cr[WS(rs, 16)];
Chris@42 970 T1X = T1V - T1W;
Chris@42 971 T5t = T1V + T1W;
Chris@42 972 }
Chris@42 973 {
Chris@42 974 E T1Y, T1Z, Tb, Tc;
Chris@42 975 T1Y = ci[WS(rs, 23)];
Chris@42 976 T1Z = cr[WS(rs, 24)];
Chris@42 977 T20 = T1Y - T1Z;
Chris@42 978 T4n = T1Y + T1Z;
Chris@42 979 Tb = ci[WS(rs, 3)];
Chris@42 980 Tc = cr[WS(rs, 12)];
Chris@42 981 Td = Tb + Tc;
Chris@42 982 T4s = Tb - Tc;
Chris@42 983 }
Chris@42 984 {
Chris@42 985 E T7, Te, T21, T28;
Chris@42 986 T4o = T4m - T4n;
Chris@42 987 T6y = T4m + T4n;
Chris@42 988 T70 = T5t - T5s;
Chris@42 989 T5u = T5s + T5t;
Chris@42 990 T7 = T3 + T6;
Chris@42 991 Te = Ta + Td;
Chris@42 992 Tf = T7 + Te;
Chris@42 993 T12 = T7 - Te;
Chris@42 994 {
Chris@42 995 E T5v, T5w, T3k, T3l;
Chris@42 996 T5v = T4p + T4q;
Chris@42 997 T5w = T4s + T4t;
Chris@42 998 T5x = KP707106781 * (T5v - T5w);
Chris@42 999 T6z = KP707106781 * (T5v + T5w);
Chris@42 1000 T3k = T1X - T20;
Chris@42 1001 T3l = Ta - Td;
Chris@42 1002 T3m = T3k - T3l;
Chris@42 1003 T3Y = T3l + T3k;
Chris@42 1004 }
Chris@42 1005 T21 = T1X + T20;
Chris@42 1006 T28 = T24 + T27;
Chris@42 1007 T29 = T21 - T28;
Chris@42 1008 T2y = T21 + T28;
Chris@42 1009 {
Chris@42 1010 E T4r, T4u, T2S, T2T;
Chris@42 1011 T4r = T4p - T4q;
Chris@42 1012 T4u = T4s - T4t;
Chris@42 1013 T4v = KP707106781 * (T4r + T4u);
Chris@42 1014 T71 = KP707106781 * (T4r - T4u);
Chris@42 1015 T2S = T3 - T6;
Chris@42 1016 T2T = T27 - T24;
Chris@42 1017 T2U = T2S - T2T;
Chris@42 1018 T3M = T2S + T2T;
Chris@42 1019 }
Chris@42 1020 }
Chris@42 1021 }
Chris@42 1022 {
Chris@42 1023 E Ti, T4H, T1c, T4F, T1f, T4I, Tl, T4E, Tp, T4A, T15, T4y, T18, T4B, Ts;
Chris@42 1024 E T4x;
Chris@42 1025 {
Chris@42 1026 E Tg, Th, T1a, T1b;
Chris@42 1027 Tg = cr[WS(rs, 2)];
Chris@42 1028 Th = ci[WS(rs, 13)];
Chris@42 1029 Ti = Tg + Th;
Chris@42 1030 T4H = Tg - Th;
Chris@42 1031 T1a = ci[WS(rs, 29)];
Chris@42 1032 T1b = cr[WS(rs, 18)];
Chris@42 1033 T1c = T1a - T1b;
Chris@42 1034 T4F = T1a + T1b;
Chris@42 1035 }
Chris@42 1036 {
Chris@42 1037 E T1d, T1e, Tj, Tk;
Chris@42 1038 T1d = ci[WS(rs, 21)];
Chris@42 1039 T1e = cr[WS(rs, 26)];
Chris@42 1040 T1f = T1d - T1e;
Chris@42 1041 T4I = T1d + T1e;
Chris@42 1042 Tj = cr[WS(rs, 10)];
Chris@42 1043 Tk = ci[WS(rs, 5)];
Chris@42 1044 Tl = Tj + Tk;
Chris@42 1045 T4E = Tj - Tk;
Chris@42 1046 }
Chris@42 1047 {
Chris@42 1048 E Tn, To, T13, T14;
Chris@42 1049 Tn = ci[WS(rs, 1)];
Chris@42 1050 To = cr[WS(rs, 14)];
Chris@42 1051 Tp = Tn + To;
Chris@42 1052 T4A = Tn - To;
Chris@42 1053 T13 = ci[WS(rs, 17)];
Chris@42 1054 T14 = cr[WS(rs, 30)];
Chris@42 1055 T15 = T13 - T14;
Chris@42 1056 T4y = T13 + T14;
Chris@42 1057 }
Chris@42 1058 {
Chris@42 1059 E T16, T17, Tq, Tr;
Chris@42 1060 T16 = ci[WS(rs, 25)];
Chris@42 1061 T17 = cr[WS(rs, 22)];
Chris@42 1062 T18 = T16 - T17;
Chris@42 1063 T4B = T16 + T17;
Chris@42 1064 Tq = cr[WS(rs, 6)];
Chris@42 1065 Tr = ci[WS(rs, 9)];
Chris@42 1066 Ts = Tq + Tr;
Chris@42 1067 T4x = Tq - Tr;
Chris@42 1068 }
Chris@42 1069 {
Chris@42 1070 E Tm, Tt, T6B, T6C;
Chris@42 1071 Tm = Ti + Tl;
Chris@42 1072 Tt = Tp + Ts;
Chris@42 1073 Tu = Tm + Tt;
Chris@42 1074 T1U = Tm - Tt;
Chris@42 1075 T6B = T4H + T4I;
Chris@42 1076 T6C = T4F - T4E;
Chris@42 1077 T6D = FNMS(KP923879532, T6C, KP382683432 * T6B);
Chris@42 1078 T73 = FMA(KP382683432, T6C, KP923879532 * T6B);
Chris@42 1079 }
Chris@42 1080 {
Chris@42 1081 E T6E, T6F, T19, T1g;
Chris@42 1082 T6E = T4A + T4B;
Chris@42 1083 T6F = T4x + T4y;
Chris@42 1084 T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
Chris@42 1085 T74 = FMA(KP382683432, T6F, KP923879532 * T6E);
Chris@42 1086 T19 = T15 + T18;
Chris@42 1087 T1g = T1c + T1f;
Chris@42 1088 T1h = T19 - T1g;
Chris@42 1089 T2z = T1g + T19;
Chris@42 1090 }
Chris@42 1091 {
Chris@42 1092 E T2V, T2W, T4z, T4C;
Chris@42 1093 T2V = T15 - T18;
Chris@42 1094 T2W = Tp - Ts;
Chris@42 1095 T2X = T2V - T2W;
Chris@42 1096 T3o = T2W + T2V;
Chris@42 1097 T4z = T4x - T4y;
Chris@42 1098 T4C = T4A - T4B;
Chris@42 1099 T4D = FNMS(KP382683432, T4C, KP923879532 * T4z);
Chris@42 1100 T5A = FMA(KP382683432, T4z, KP923879532 * T4C);
Chris@42 1101 }
Chris@42 1102 {
Chris@42 1103 E T4G, T4J, T2Y, T2Z;
Chris@42 1104 T4G = T4E + T4F;
Chris@42 1105 T4J = T4H - T4I;
Chris@42 1106 T4K = FMA(KP923879532, T4G, KP382683432 * T4J);
Chris@42 1107 T5z = FNMS(KP382683432, T4G, KP923879532 * T4J);
Chris@42 1108 T2Y = Ti - Tl;
Chris@42 1109 T2Z = T1c - T1f;
Chris@42 1110 T30 = T2Y + T2Z;
Chris@42 1111 T3n = T2Y - T2Z;
Chris@42 1112 }
Chris@42 1113 }
Chris@42 1114 {
Chris@42 1115 E Ty, T4N, TB, T4Y, T1p, T4O, T1m, T4Z, TI, T52, T1w, T4V, TF, T51, T1t;
Chris@42 1116 E T4S;
Chris@42 1117 {
Chris@42 1118 E Tw, Tx, T1k, T1l;
Chris@42 1119 Tw = cr[WS(rs, 1)];
Chris@42 1120 Tx = ci[WS(rs, 14)];
Chris@42 1121 Ty = Tw + Tx;
Chris@42 1122 T4N = Tw - Tx;
Chris@42 1123 {
Chris@42 1124 E Tz, TA, T1n, T1o;
Chris@42 1125 Tz = cr[WS(rs, 9)];
Chris@42 1126 TA = ci[WS(rs, 6)];
Chris@42 1127 TB = Tz + TA;
Chris@42 1128 T4Y = Tz - TA;
Chris@42 1129 T1n = ci[WS(rs, 22)];
Chris@42 1130 T1o = cr[WS(rs, 25)];
Chris@42 1131 T1p = T1n - T1o;
Chris@42 1132 T4O = T1n + T1o;
Chris@42 1133 }
Chris@42 1134 T1k = ci[WS(rs, 30)];
Chris@42 1135 T1l = cr[WS(rs, 17)];
Chris@42 1136 T1m = T1k - T1l;
Chris@42 1137 T4Z = T1k + T1l;
Chris@42 1138 {
Chris@42 1139 E TG, TH, T4T, T1u, T1v, T4U;
Chris@42 1140 TG = ci[WS(rs, 2)];
Chris@42 1141 TH = cr[WS(rs, 13)];
Chris@42 1142 T4T = TG - TH;
Chris@42 1143 T1u = ci[WS(rs, 18)];
Chris@42 1144 T1v = cr[WS(rs, 29)];
Chris@42 1145 T4U = T1u + T1v;
Chris@42 1146 TI = TG + TH;
Chris@42 1147 T52 = T4T + T4U;
Chris@42 1148 T1w = T1u - T1v;
Chris@42 1149 T4V = T4T - T4U;
Chris@42 1150 }
Chris@42 1151 {
Chris@42 1152 E TD, TE, T4Q, T1r, T1s, T4R;
Chris@42 1153 TD = cr[WS(rs, 5)];
Chris@42 1154 TE = ci[WS(rs, 10)];
Chris@42 1155 T4Q = TD - TE;
Chris@42 1156 T1r = ci[WS(rs, 26)];
Chris@42 1157 T1s = cr[WS(rs, 21)];
Chris@42 1158 T4R = T1r + T1s;
Chris@42 1159 TF = TD + TE;
Chris@42 1160 T51 = T4Q + T4R;
Chris@42 1161 T1t = T1r - T1s;
Chris@42 1162 T4S = T4Q - T4R;
Chris@42 1163 }
Chris@42 1164 }
Chris@42 1165 {
Chris@42 1166 E TC, TJ, T6Q, T6R;
Chris@42 1167 TC = Ty + TB;
Chris@42 1168 TJ = TF + TI;
Chris@42 1169 TK = TC + TJ;
Chris@42 1170 T1j = TC - TJ;
Chris@42 1171 T6Q = T4Z - T4Y;
Chris@42 1172 T6R = KP707106781 * (T4S - T4V);
Chris@42 1173 T6S = T6Q + T6R;
Chris@42 1174 T7w = T6Q - T6R;
Chris@42 1175 }
Chris@42 1176 {
Chris@42 1177 E T6T, T6U, T1q, T1x;
Chris@42 1178 T6T = T4N + T4O;
Chris@42 1179 T6U = KP707106781 * (T51 + T52);
Chris@42 1180 T6V = T6T - T6U;
Chris@42 1181 T7v = T6T + T6U;
Chris@42 1182 T1q = T1m + T1p;
Chris@42 1183 T1x = T1t + T1w;
Chris@42 1184 T1y = T1q - T1x;
Chris@42 1185 T2B = T1q + T1x;
Chris@42 1186 }
Chris@42 1187 {
Chris@42 1188 E T3a, T3b, T4P, T4W;
Chris@42 1189 T3a = T1m - T1p;
Chris@42 1190 T3b = TF - TI;
Chris@42 1191 T3c = T3a - T3b;
Chris@42 1192 T3S = T3b + T3a;
Chris@42 1193 T4P = T4N - T4O;
Chris@42 1194 T4W = KP707106781 * (T4S + T4V);
Chris@42 1195 T4X = T4P - T4W;
Chris@42 1196 T61 = T4P + T4W;
Chris@42 1197 }
Chris@42 1198 {
Chris@42 1199 E T50, T53, T3d, T3e;
Chris@42 1200 T50 = T4Y + T4Z;
Chris@42 1201 T53 = KP707106781 * (T51 - T52);
Chris@42 1202 T54 = T50 - T53;
Chris@42 1203 T62 = T50 + T53;
Chris@42 1204 T3d = Ty - TB;
Chris@42 1205 T3e = T1w - T1t;
Chris@42 1206 T3f = T3d - T3e;
Chris@42 1207 T3T = T3d + T3e;
Chris@42 1208 }
Chris@42 1209 }
Chris@42 1210 {
Chris@42 1211 E TN, T56, TQ, T5h, T1G, T57, T1D, T5i, TX, T5l, T1N, T5e, TU, T5k, T1K;
Chris@42 1212 E T5b;
Chris@42 1213 {
Chris@42 1214 E TL, TM, T1B, T1C;
Chris@42 1215 TL = ci[0];
Chris@42 1216 TM = cr[WS(rs, 15)];
Chris@42 1217 TN = TL + TM;
Chris@42 1218 T56 = TL - TM;
Chris@42 1219 {
Chris@42 1220 E TO, TP, T1E, T1F;
Chris@42 1221 TO = cr[WS(rs, 7)];
Chris@42 1222 TP = ci[WS(rs, 8)];
Chris@42 1223 TQ = TO + TP;
Chris@42 1224 T5h = TO - TP;
Chris@42 1225 T1E = ci[WS(rs, 24)];
Chris@42 1226 T1F = cr[WS(rs, 23)];
Chris@42 1227 T1G = T1E - T1F;
Chris@42 1228 T57 = T1E + T1F;
Chris@42 1229 }
Chris@42 1230 T1B = ci[WS(rs, 16)];
Chris@42 1231 T1C = cr[WS(rs, 31)];
Chris@42 1232 T1D = T1B - T1C;
Chris@42 1233 T5i = T1B + T1C;
Chris@42 1234 {
Chris@42 1235 E TV, TW, T5c, T1L, T1M, T5d;
Chris@42 1236 TV = ci[WS(rs, 4)];
Chris@42 1237 TW = cr[WS(rs, 11)];
Chris@42 1238 T5c = TV - TW;
Chris@42 1239 T1L = ci[WS(rs, 20)];
Chris@42 1240 T1M = cr[WS(rs, 27)];
Chris@42 1241 T5d = T1L + T1M;
Chris@42 1242 TX = TV + TW;
Chris@42 1243 T5l = T5c + T5d;
Chris@42 1244 T1N = T1L - T1M;
Chris@42 1245 T5e = T5c - T5d;
Chris@42 1246 }
Chris@42 1247 {
Chris@42 1248 E TS, TT, T59, T1I, T1J, T5a;
Chris@42 1249 TS = cr[WS(rs, 3)];
Chris@42 1250 TT = ci[WS(rs, 12)];
Chris@42 1251 T59 = TS - TT;
Chris@42 1252 T1I = ci[WS(rs, 28)];
Chris@42 1253 T1J = cr[WS(rs, 19)];
Chris@42 1254 T5a = T1I + T1J;
Chris@42 1255 TU = TS + TT;
Chris@42 1256 T5k = T59 + T5a;
Chris@42 1257 T1K = T1I - T1J;
Chris@42 1258 T5b = T59 - T5a;
Chris@42 1259 }
Chris@42 1260 }
Chris@42 1261 {
Chris@42 1262 E TR, TY, T6J, T6K;
Chris@42 1263 TR = TN + TQ;
Chris@42 1264 TY = TU + TX;
Chris@42 1265 TZ = TR + TY;
Chris@42 1266 T1A = TR - TY;
Chris@42 1267 T6J = KP707106781 * (T5b - T5e);
Chris@42 1268 T6K = T5h + T5i;
Chris@42 1269 T6L = T6J - T6K;
Chris@42 1270 T7z = T6K + T6J;
Chris@42 1271 }
Chris@42 1272 {
Chris@42 1273 E T6M, T6N, T1H, T1O;
Chris@42 1274 T6M = T56 + T57;
Chris@42 1275 T6N = KP707106781 * (T5k + T5l);
Chris@42 1276 T6O = T6M - T6N;
Chris@42 1277 T7y = T6M + T6N;
Chris@42 1278 T1H = T1D + T1G;
Chris@42 1279 T1O = T1K + T1N;
Chris@42 1280 T1P = T1H - T1O;
Chris@42 1281 T2C = T1H + T1O;
Chris@42 1282 }
Chris@42 1283 {
Chris@42 1284 E T33, T34, T58, T5f;
Chris@42 1285 T33 = T1D - T1G;
Chris@42 1286 T34 = TU - TX;
Chris@42 1287 T35 = T33 - T34;
Chris@42 1288 T3P = T34 + T33;
Chris@42 1289 T58 = T56 - T57;
Chris@42 1290 T5f = KP707106781 * (T5b + T5e);
Chris@42 1291 T5g = T58 - T5f;
Chris@42 1292 T64 = T58 + T5f;
Chris@42 1293 }
Chris@42 1294 {
Chris@42 1295 E T5j, T5m, T36, T37;
Chris@42 1296 T5j = T5h - T5i;
Chris@42 1297 T5m = KP707106781 * (T5k - T5l);
Chris@42 1298 T5n = T5j - T5m;
Chris@42 1299 T65 = T5j + T5m;
Chris@42 1300 T36 = TN - TQ;
Chris@42 1301 T37 = T1N - T1K;
Chris@42 1302 T38 = T36 - T37;
Chris@42 1303 T3Q = T36 + T37;
Chris@42 1304 }
Chris@42 1305 }
Chris@42 1306 {
Chris@42 1307 E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x;
Chris@42 1308 Tv = Tf + Tu;
Chris@42 1309 T10 = TK + TZ;
Chris@42 1310 T2w = Tv - T10;
Chris@42 1311 T2A = T2y + T2z;
Chris@42 1312 T2D = T2B + T2C;
Chris@42 1313 T2E = T2A - T2D;
Chris@42 1314 cr[0] = Tv + T10;
Chris@42 1315 ci[0] = T2A + T2D;
Chris@42 1316 T2v = W[30];
Chris@42 1317 T2x = W[31];
Chris@42 1318 cr[WS(rs, 16)] = FNMS(T2x, T2E, T2v * T2w);
Chris@42 1319 ci[WS(rs, 16)] = FMA(T2x, T2w, T2v * T2E);
Chris@42 1320 }
Chris@42 1321 {
Chris@42 1322 E T2I, T2O, T2M, T2Q;
Chris@42 1323 {
Chris@42 1324 E T2G, T2H, T2K, T2L;
Chris@42 1325 T2G = Tf - Tu;
Chris@42 1326 T2H = T2C - T2B;
Chris@42 1327 T2I = T2G - T2H;
Chris@42 1328 T2O = T2G + T2H;
Chris@42 1329 T2K = T2y - T2z;
Chris@42 1330 T2L = TK - TZ;
Chris@42 1331 T2M = T2K - T2L;
Chris@42 1332 T2Q = T2L + T2K;
Chris@42 1333 }
Chris@42 1334 {
Chris@42 1335 E T2F, T2J, T2N, T2P;
Chris@42 1336 T2F = W[46];
Chris@42 1337 T2J = W[47];
Chris@42 1338 cr[WS(rs, 24)] = FNMS(T2J, T2M, T2F * T2I);
Chris@42 1339 ci[WS(rs, 24)] = FMA(T2F, T2M, T2J * T2I);
Chris@42 1340 T2N = W[14];
Chris@42 1341 T2P = W[15];
Chris@42 1342 cr[WS(rs, 8)] = FNMS(T2P, T2Q, T2N * T2O);
Chris@42 1343 ci[WS(rs, 8)] = FMA(T2N, T2Q, T2P * T2O);
Chris@42 1344 }
Chris@42 1345 }
Chris@42 1346 {
Chris@42 1347 E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p;
Chris@42 1348 T1i = T12 + T1h;
Chris@42 1349 T2a = T1U + T29;
Chris@42 1350 T2o = T29 - T1U;
Chris@42 1351 T2k = T12 - T1h;
Chris@42 1352 {
Chris@42 1353 E T2b, T2c, T1z, T1Q;
Chris@42 1354 T2b = T1j + T1y;
Chris@42 1355 T2c = T1P - T1A;
Chris@42 1356 T2d = KP707106781 * (T2b + T2c);
Chris@42 1357 T2l = KP707106781 * (T2c - T2b);
Chris@42 1358 T1z = T1j - T1y;
Chris@42 1359 T1Q = T1A + T1P;
Chris@42 1360 T1R = KP707106781 * (T1z + T1Q);
Chris@42 1361 T2p = KP707106781 * (T1z - T1Q);
Chris@42 1362 }
Chris@42 1363 {
Chris@42 1364 E T1S, T2e, T11, T1T;
Chris@42 1365 T1S = T1i - T1R;
Chris@42 1366 T2e = T2a - T2d;
Chris@42 1367 T11 = W[38];
Chris@42 1368 T1T = W[39];
Chris@42 1369 cr[WS(rs, 20)] = FNMS(T1T, T2e, T11 * T1S);
Chris@42 1370 ci[WS(rs, 20)] = FMA(T1T, T1S, T11 * T2e);
Chris@42 1371 }
Chris@42 1372 {
Chris@42 1373 E T2s, T2u, T2r, T2t;
Chris@42 1374 T2s = T2k + T2l;
Chris@42 1375 T2u = T2o + T2p;
Chris@42 1376 T2r = W[22];
Chris@42 1377 T2t = W[23];
Chris@42 1378 cr[WS(rs, 12)] = FNMS(T2t, T2u, T2r * T2s);
Chris@42 1379 ci[WS(rs, 12)] = FMA(T2r, T2u, T2t * T2s);
Chris@42 1380 }
Chris@42 1381 {
Chris@42 1382 E T2g, T2i, T2f, T2h;
Chris@42 1383 T2g = T1i + T1R;
Chris@42 1384 T2i = T2a + T2d;
Chris@42 1385 T2f = W[6];
Chris@42 1386 T2h = W[7];
Chris@42 1387 cr[WS(rs, 4)] = FNMS(T2h, T2i, T2f * T2g);
Chris@42 1388 ci[WS(rs, 4)] = FMA(T2h, T2g, T2f * T2i);
Chris@42 1389 }
Chris@42 1390 {
Chris@42 1391 E T2m, T2q, T2j, T2n;
Chris@42 1392 T2m = T2k - T2l;
Chris@42 1393 T2q = T2o - T2p;
Chris@42 1394 T2j = W[54];
Chris@42 1395 T2n = W[55];
Chris@42 1396 cr[WS(rs, 28)] = FNMS(T2n, T2q, T2j * T2m);
Chris@42 1397 ci[WS(rs, 28)] = FMA(T2j, T2q, T2n * T2m);
Chris@42 1398 }
Chris@42 1399 }
Chris@42 1400 {
Chris@42 1401 E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z;
Chris@42 1402 T3N = KP707106781 * (T3n + T3o);
Chris@42 1403 T3O = T3M - T3N;
Chris@42 1404 T4a = T3M + T3N;
Chris@42 1405 T3Z = KP707106781 * (T30 + T2X);
Chris@42 1406 T40 = T3Y - T3Z;
Chris@42 1407 T4e = T3Y + T3Z;
Chris@42 1408 {
Chris@42 1409 E T3R, T3U, T41, T42;
Chris@42 1410 T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P);
Chris@42 1411 T3U = FMA(KP923879532, T3S, KP382683432 * T3T);
Chris@42 1412 T3V = T3R - T3U;
Chris@42 1413 T4f = T3U + T3R;
Chris@42 1414 T41 = FNMS(KP382683432, T3S, KP923879532 * T3T);
Chris@42 1415 T42 = FMA(KP382683432, T3P, KP923879532 * T3Q);
Chris@42 1416 T43 = T41 - T42;
Chris@42 1417 T4b = T41 + T42;
Chris@42 1418 }
Chris@42 1419 {
Chris@42 1420 E T3W, T44, T3L, T3X;
Chris@42 1421 T3W = T3O - T3V;
Chris@42 1422 T44 = T40 - T43;
Chris@42 1423 T3L = W[50];
Chris@42 1424 T3X = W[51];
Chris@42 1425 cr[WS(rs, 26)] = FNMS(T3X, T44, T3L * T3W);
Chris@42 1426 ci[WS(rs, 26)] = FMA(T3X, T3W, T3L * T44);
Chris@42 1427 }
Chris@42 1428 {
Chris@42 1429 E T4i, T4k, T4h, T4j;
Chris@42 1430 T4i = T4a + T4b;
Chris@42 1431 T4k = T4e + T4f;
Chris@42 1432 T4h = W[2];
Chris@42 1433 T4j = W[3];
Chris@42 1434 cr[WS(rs, 2)] = FNMS(T4j, T4k, T4h * T4i);
Chris@42 1435 ci[WS(rs, 2)] = FMA(T4h, T4k, T4j * T4i);
Chris@42 1436 }
Chris@42 1437 {
Chris@42 1438 E T46, T48, T45, T47;
Chris@42 1439 T46 = T3O + T3V;
Chris@42 1440 T48 = T40 + T43;
Chris@42 1441 T45 = W[18];
Chris@42 1442 T47 = W[19];
Chris@42 1443 cr[WS(rs, 10)] = FNMS(T47, T48, T45 * T46);
Chris@42 1444 ci[WS(rs, 10)] = FMA(T47, T46, T45 * T48);
Chris@42 1445 }
Chris@42 1446 {
Chris@42 1447 E T4c, T4g, T49, T4d;
Chris@42 1448 T4c = T4a - T4b;
Chris@42 1449 T4g = T4e - T4f;
Chris@42 1450 T49 = W[34];
Chris@42 1451 T4d = W[35];
Chris@42 1452 cr[WS(rs, 18)] = FNMS(T4d, T4g, T49 * T4c);
Chris@42 1453 ci[WS(rs, 18)] = FMA(T49, T4g, T4d * T4c);
Chris@42 1454 }
Chris@42 1455 }
Chris@42 1456 {
Chris@42 1457 E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p;
Chris@42 1458 T31 = KP707106781 * (T2X - T30);
Chris@42 1459 T32 = T2U - T31;
Chris@42 1460 T3A = T2U + T31;
Chris@42 1461 T3p = KP707106781 * (T3n - T3o);
Chris@42 1462 T3q = T3m - T3p;
Chris@42 1463 T3E = T3m + T3p;
Chris@42 1464 {
Chris@42 1465 E T39, T3g, T3r, T3s;
Chris@42 1466 T39 = FNMS(KP923879532, T38, KP382683432 * T35);
Chris@42 1467 T3g = FMA(KP382683432, T3c, KP923879532 * T3f);
Chris@42 1468 T3h = T39 - T3g;
Chris@42 1469 T3F = T3g + T39;
Chris@42 1470 T3r = FNMS(KP923879532, T3c, KP382683432 * T3f);
Chris@42 1471 T3s = FMA(KP923879532, T35, KP382683432 * T38);
Chris@42 1472 T3t = T3r - T3s;
Chris@42 1473 T3B = T3r + T3s;
Chris@42 1474 }
Chris@42 1475 {
Chris@42 1476 E T3i, T3u, T2R, T3j;
Chris@42 1477 T3i = T32 - T3h;
Chris@42 1478 T3u = T3q - T3t;
Chris@42 1479 T2R = W[58];
Chris@42 1480 T3j = W[59];
Chris@42 1481 cr[WS(rs, 30)] = FNMS(T3j, T3u, T2R * T3i);
Chris@42 1482 ci[WS(rs, 30)] = FMA(T3j, T3i, T2R * T3u);
Chris@42 1483 }
Chris@42 1484 {
Chris@42 1485 E T3I, T3K, T3H, T3J;
Chris@42 1486 T3I = T3A + T3B;
Chris@42 1487 T3K = T3E + T3F;
Chris@42 1488 T3H = W[10];
Chris@42 1489 T3J = W[11];
Chris@42 1490 cr[WS(rs, 6)] = FNMS(T3J, T3K, T3H * T3I);
Chris@42 1491 ci[WS(rs, 6)] = FMA(T3H, T3K, T3J * T3I);
Chris@42 1492 }
Chris@42 1493 {
Chris@42 1494 E T3w, T3y, T3v, T3x;
Chris@42 1495 T3w = T32 + T3h;
Chris@42 1496 T3y = T3q + T3t;
Chris@42 1497 T3v = W[26];
Chris@42 1498 T3x = W[27];
Chris@42 1499 cr[WS(rs, 14)] = FNMS(T3x, T3y, T3v * T3w);
Chris@42 1500 ci[WS(rs, 14)] = FMA(T3x, T3w, T3v * T3y);
Chris@42 1501 }
Chris@42 1502 {
Chris@42 1503 E T3C, T3G, T3z, T3D;
Chris@42 1504 T3C = T3A - T3B;
Chris@42 1505 T3G = T3E - T3F;
Chris@42 1506 T3z = W[42];
Chris@42 1507 T3D = W[43];
Chris@42 1508 cr[WS(rs, 22)] = FNMS(T3D, T3G, T3z * T3C);
Chris@42 1509 ci[WS(rs, 22)] = FMA(T3z, T3G, T3D * T3C);
Chris@42 1510 }
Chris@42 1511 }
Chris@42 1512 {
Chris@42 1513 E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q;
Chris@42 1514 {
Chris@42 1515 E T5Y, T5Z, T6d, T6e;
Chris@42 1516 T5Y = T4o + T4v;
Chris@42 1517 T5Z = T5z + T5A;
Chris@42 1518 T60 = T5Y + T5Z;
Chris@42 1519 T6m = T5Y - T5Z;
Chris@42 1520 T6d = FMA(KP195090322, T61, KP980785280 * T62);
Chris@42 1521 T6e = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@42 1522 T6f = T6d + T6e;
Chris@42 1523 T6n = T6e - T6d;
Chris@42 1524 }
Chris@42 1525 {
Chris@42 1526 E T63, T66, T6a, T6b;
Chris@42 1527 T63 = FNMS(KP195090322, T62, KP980785280 * T61);
Chris@42 1528 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@42 1529 T67 = T63 + T66;
Chris@42 1530 T6r = T63 - T66;
Chris@42 1531 T6a = T5u + T5x;
Chris@42 1532 T6b = T4K + T4D;
Chris@42 1533 T6c = T6a + T6b;
Chris@42 1534 T6q = T6a - T6b;
Chris@42 1535 }
Chris@42 1536 {
Chris@42 1537 E T68, T6g, T5X, T69;
Chris@42 1538 T68 = T60 - T67;
Chris@42 1539 T6g = T6c - T6f;
Chris@42 1540 T5X = W[32];
Chris@42 1541 T69 = W[33];
Chris@42 1542 cr[WS(rs, 17)] = FNMS(T69, T6g, T5X * T68);
Chris@42 1543 ci[WS(rs, 17)] = FMA(T69, T68, T5X * T6g);
Chris@42 1544 }
Chris@42 1545 {
Chris@42 1546 E T6u, T6w, T6t, T6v;
Chris@42 1547 T6u = T6m + T6n;
Chris@42 1548 T6w = T6q + T6r;
Chris@42 1549 T6t = W[16];
Chris@42 1550 T6v = W[17];
Chris@42 1551 cr[WS(rs, 9)] = FNMS(T6v, T6w, T6t * T6u);
Chris@42 1552 ci[WS(rs, 9)] = FMA(T6t, T6w, T6v * T6u);
Chris@42 1553 }
Chris@42 1554 {
Chris@42 1555 E T6i, T6k, T6h, T6j;
Chris@42 1556 T6i = T60 + T67;
Chris@42 1557 T6k = T6c + T6f;
Chris@42 1558 T6h = W[0];
Chris@42 1559 T6j = W[1];
Chris@42 1560 cr[WS(rs, 1)] = FNMS(T6j, T6k, T6h * T6i);
Chris@42 1561 ci[WS(rs, 1)] = FMA(T6j, T6i, T6h * T6k);
Chris@42 1562 }
Chris@42 1563 {
Chris@42 1564 E T6o, T6s, T6l, T6p;
Chris@42 1565 T6o = T6m - T6n;
Chris@42 1566 T6s = T6q - T6r;
Chris@42 1567 T6l = W[48];
Chris@42 1568 T6p = W[49];
Chris@42 1569 cr[WS(rs, 25)] = FNMS(T6p, T6s, T6l * T6o);
Chris@42 1570 ci[WS(rs, 25)] = FMA(T6l, T6s, T6p * T6o);
Chris@42 1571 }
Chris@42 1572 }
Chris@42 1573 {
Chris@42 1574 E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U;
Chris@42 1575 {
Chris@42 1576 E T7s, T7t, T7H, T7I;
Chris@42 1577 T7s = T6y + T6z;
Chris@42 1578 T7t = T73 + T74;
Chris@42 1579 T7u = T7s - T7t;
Chris@42 1580 T7Q = T7s + T7t;
Chris@42 1581 T7H = FMA(KP195090322, T7w, KP980785280 * T7v);
Chris@42 1582 T7I = FMA(KP195090322, T7z, KP980785280 * T7y);
Chris@42 1583 T7J = T7H - T7I;
Chris@42 1584 T7R = T7H + T7I;
Chris@42 1585 }
Chris@42 1586 {
Chris@42 1587 E T7x, T7A, T7E, T7F;
Chris@42 1588 T7x = FNMS(KP980785280, T7w, KP195090322 * T7v);
Chris@42 1589 T7A = FNMS(KP980785280, T7z, KP195090322 * T7y);
Chris@42 1590 T7B = T7x + T7A;
Chris@42 1591 T7V = T7x - T7A;
Chris@42 1592 T7E = T70 - T71;
Chris@42 1593 T7F = T6D - T6G;
Chris@42 1594 T7G = T7E + T7F;
Chris@42 1595 T7U = T7E - T7F;
Chris@42 1596 }
Chris@42 1597 {
Chris@42 1598 E T7C, T7K, T7r, T7D;
Chris@42 1599 T7C = T7u - T7B;
Chris@42 1600 T7K = T7G - T7J;
Chris@42 1601 T7r = W[44];
Chris@42 1602 T7D = W[45];
Chris@42 1603 cr[WS(rs, 23)] = FNMS(T7D, T7K, T7r * T7C);
Chris@42 1604 ci[WS(rs, 23)] = FMA(T7D, T7C, T7r * T7K);
Chris@42 1605 }
Chris@42 1606 {
Chris@42 1607 E T7Y, T80, T7X, T7Z;
Chris@42 1608 T7Y = T7Q + T7R;
Chris@42 1609 T80 = T7U - T7V;
Chris@42 1610 T7X = W[60];
Chris@42 1611 T7Z = W[61];
Chris@42 1612 cr[WS(rs, 31)] = FNMS(T7Z, T80, T7X * T7Y);
Chris@42 1613 ci[WS(rs, 31)] = FMA(T7X, T80, T7Z * T7Y);
Chris@42 1614 }
Chris@42 1615 {
Chris@42 1616 E T7M, T7O, T7L, T7N;
Chris@42 1617 T7M = T7u + T7B;
Chris@42 1618 T7O = T7G + T7J;
Chris@42 1619 T7L = W[12];
Chris@42 1620 T7N = W[13];
Chris@42 1621 cr[WS(rs, 7)] = FNMS(T7N, T7O, T7L * T7M);
Chris@42 1622 ci[WS(rs, 7)] = FMA(T7N, T7M, T7L * T7O);
Chris@42 1623 }
Chris@42 1624 {
Chris@42 1625 E T7S, T7W, T7P, T7T;
Chris@42 1626 T7S = T7Q - T7R;
Chris@42 1627 T7W = T7U + T7V;
Chris@42 1628 T7P = W[28];
Chris@42 1629 T7T = W[29];
Chris@42 1630 cr[WS(rs, 15)] = FNMS(T7T, T7W, T7P * T7S);
Chris@42 1631 ci[WS(rs, 15)] = FMA(T7P, T7W, T7T * T7S);
Chris@42 1632 }
Chris@42 1633 }
Chris@42 1634 {
Chris@42 1635 E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q;
Chris@42 1636 {
Chris@42 1637 E T4w, T4L, T5D, T5E;
Chris@42 1638 T4w = T4o - T4v;
Chris@42 1639 T4L = T4D - T4K;
Chris@42 1640 T4M = T4w + T4L;
Chris@42 1641 T5M = T4w - T4L;
Chris@42 1642 T5D = FMA(KP831469612, T4X, KP555570233 * T54);
Chris@42 1643 T5E = FNMS(KP831469612, T5g, KP555570233 * T5n);
Chris@42 1644 T5F = T5D + T5E;
Chris@42 1645 T5N = T5E - T5D;
Chris@42 1646 }
Chris@42 1647 {
Chris@42 1648 E T55, T5o, T5y, T5B;
Chris@42 1649 T55 = FNMS(KP831469612, T54, KP555570233 * T4X);
Chris@42 1650 T5o = FMA(KP555570233, T5g, KP831469612 * T5n);
Chris@42 1651 T5p = T55 + T5o;
Chris@42 1652 T5R = T55 - T5o;
Chris@42 1653 T5y = T5u - T5x;
Chris@42 1654 T5B = T5z - T5A;
Chris@42 1655 T5C = T5y + T5B;
Chris@42 1656 T5Q = T5y - T5B;
Chris@42 1657 }
Chris@42 1658 {
Chris@42 1659 E T5q, T5G, T4l, T5r;
Chris@42 1660 T5q = T4M - T5p;
Chris@42 1661 T5G = T5C - T5F;
Chris@42 1662 T4l = W[40];
Chris@42 1663 T5r = W[41];
Chris@42 1664 cr[WS(rs, 21)] = FNMS(T5r, T5G, T4l * T5q);
Chris@42 1665 ci[WS(rs, 21)] = FMA(T5r, T5q, T4l * T5G);
Chris@42 1666 }
Chris@42 1667 {
Chris@42 1668 E T5U, T5W, T5T, T5V;
Chris@42 1669 T5U = T5M + T5N;
Chris@42 1670 T5W = T5Q + T5R;
Chris@42 1671 T5T = W[24];
Chris@42 1672 T5V = W[25];
Chris@42 1673 cr[WS(rs, 13)] = FNMS(T5V, T5W, T5T * T5U);
Chris@42 1674 ci[WS(rs, 13)] = FMA(T5T, T5W, T5V * T5U);
Chris@42 1675 }
Chris@42 1676 {
Chris@42 1677 E T5I, T5K, T5H, T5J;
Chris@42 1678 T5I = T4M + T5p;
Chris@42 1679 T5K = T5C + T5F;
Chris@42 1680 T5H = W[8];
Chris@42 1681 T5J = W[9];
Chris@42 1682 cr[WS(rs, 5)] = FNMS(T5J, T5K, T5H * T5I);
Chris@42 1683 ci[WS(rs, 5)] = FMA(T5J, T5I, T5H * T5K);
Chris@42 1684 }
Chris@42 1685 {
Chris@42 1686 E T5O, T5S, T5L, T5P;
Chris@42 1687 T5O = T5M - T5N;
Chris@42 1688 T5S = T5Q - T5R;
Chris@42 1689 T5L = W[56];
Chris@42 1690 T5P = W[57];
Chris@42 1691 cr[WS(rs, 29)] = FNMS(T5P, T5S, T5L * T5O);
Chris@42 1692 ci[WS(rs, 29)] = FMA(T5L, T5S, T5P * T5O);
Chris@42 1693 }
Chris@42 1694 }
Chris@42 1695 {
Chris@42 1696 E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k;
Chris@42 1697 {
Chris@42 1698 E T6A, T6H, T77, T78;
Chris@42 1699 T6A = T6y - T6z;
Chris@42 1700 T6H = T6D + T6G;
Chris@42 1701 T6I = T6A - T6H;
Chris@42 1702 T7g = T6A + T6H;
Chris@42 1703 T77 = FNMS(KP555570233, T6S, KP831469612 * T6V);
Chris@42 1704 T78 = FMA(KP555570233, T6L, KP831469612 * T6O);
Chris@42 1705 T79 = T77 - T78;
Chris@42 1706 T7h = T77 + T78;
Chris@42 1707 }
Chris@42 1708 {
Chris@42 1709 E T6P, T6W, T72, T75;
Chris@42 1710 T6P = FNMS(KP555570233, T6O, KP831469612 * T6L);
Chris@42 1711 T6W = FMA(KP831469612, T6S, KP555570233 * T6V);
Chris@42 1712 T6X = T6P - T6W;
Chris@42 1713 T7l = T6W + T6P;
Chris@42 1714 T72 = T70 + T71;
Chris@42 1715 T75 = T73 - T74;
Chris@42 1716 T76 = T72 - T75;
Chris@42 1717 T7k = T72 + T75;
Chris@42 1718 }
Chris@42 1719 {
Chris@42 1720 E T6Y, T7a, T6x, T6Z;
Chris@42 1721 T6Y = T6I - T6X;
Chris@42 1722 T7a = T76 - T79;
Chris@42 1723 T6x = W[52];
Chris@42 1724 T6Z = W[53];
Chris@42 1725 cr[WS(rs, 27)] = FNMS(T6Z, T7a, T6x * T6Y);
Chris@42 1726 ci[WS(rs, 27)] = FMA(T6Z, T6Y, T6x * T7a);
Chris@42 1727 }
Chris@42 1728 {
Chris@42 1729 E T7o, T7q, T7n, T7p;
Chris@42 1730 T7o = T7g + T7h;
Chris@42 1731 T7q = T7k + T7l;
Chris@42 1732 T7n = W[4];
Chris@42 1733 T7p = W[5];
Chris@42 1734 cr[WS(rs, 3)] = FNMS(T7p, T7q, T7n * T7o);
Chris@42 1735 ci[WS(rs, 3)] = FMA(T7n, T7q, T7p * T7o);
Chris@42 1736 }
Chris@42 1737 {
Chris@42 1738 E T7c, T7e, T7b, T7d;
Chris@42 1739 T7c = T6I + T6X;
Chris@42 1740 T7e = T76 + T79;
Chris@42 1741 T7b = W[20];
Chris@42 1742 T7d = W[21];
Chris@42 1743 cr[WS(rs, 11)] = FNMS(T7d, T7e, T7b * T7c);
Chris@42 1744 ci[WS(rs, 11)] = FMA(T7d, T7c, T7b * T7e);
Chris@42 1745 }
Chris@42 1746 {
Chris@42 1747 E T7i, T7m, T7f, T7j;
Chris@42 1748 T7i = T7g - T7h;
Chris@42 1749 T7m = T7k - T7l;
Chris@42 1750 T7f = W[36];
Chris@42 1751 T7j = W[37];
Chris@42 1752 cr[WS(rs, 19)] = FNMS(T7j, T7m, T7f * T7i);
Chris@42 1753 ci[WS(rs, 19)] = FMA(T7f, T7m, T7j * T7i);
Chris@42 1754 }
Chris@42 1755 }
Chris@42 1756 }
Chris@42 1757 }
Chris@42 1758 }
Chris@42 1759
Chris@42 1760 static const tw_instr twinstr[] = {
Chris@42 1761 {TW_FULL, 1, 32},
Chris@42 1762 {TW_NEXT, 1, 0}
Chris@42 1763 };
Chris@42 1764
Chris@42 1765 static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@42 1766
Chris@42 1767 void X(codelet_hb_32) (planner *p) {
Chris@42 1768 X(khc2hc_register) (p, hb_32, &desc);
Chris@42 1769 }
Chris@42 1770 #endif /* HAVE_FMA */