annotate src/fftw-3.3.5/rdft/scalar/r2cb/hc2cb_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:51:31 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include hc2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@42 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@42 33 * 137 stack variables, 7 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cb.h"
Chris@42 36
Chris@42 37 static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 {
Chris@42 47 INT m;
Chris@42 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 49 E T5o, T5r, T5q, T5n, T5s, T5p;
Chris@42 50 {
Chris@42 51 E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52;
Chris@42 52 E T42, TZ, T6X, T3p, T1X, T8B, T8p, T3o, T26, T58, T4n, T7T, T7z, T59, T4k;
Chris@42 53 E T6p, T6a, TK, T6W, T8s, T8A, T2o, T3m, T3l, T2x, T55, T4g, T7S, T7G, T56;
Chris@42 54 E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48;
Chris@42 55 E T3j, T2O, T53, T4y, T62, T69;
Chris@42 56 {
Chris@42 57 E T6l, T6i, T40, T41;
Chris@42 58 {
Chris@42 59 E T12, T3, T6g, T2G, T2D, T6, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a;
Chris@42 60 E T6j;
Chris@42 61 {
Chris@42 62 E T4, T5, T13, T14;
Chris@42 63 {
Chris@42 64 E T1, T2, T2E, T2F;
Chris@42 65 T1 = Rp[0];
Chris@42 66 T2 = Rm[WS(rs, 15)];
Chris@42 67 T2E = Ip[0];
Chris@42 68 T2F = Im[WS(rs, 15)];
Chris@42 69 T4 = Rp[WS(rs, 8)];
Chris@42 70 T12 = T1 - T2;
Chris@42 71 T3 = T1 + T2;
Chris@42 72 T6g = T2E - T2F;
Chris@42 73 T2G = T2E + T2F;
Chris@42 74 T5 = Rm[WS(rs, 7)];
Chris@42 75 }
Chris@42 76 T13 = Ip[WS(rs, 8)];
Chris@42 77 T14 = Im[WS(rs, 7)];
Chris@42 78 {
Chris@42 79 E Tb, Tc, T1d, T1e;
Chris@42 80 Tb = Rm[WS(rs, 3)];
Chris@42 81 T2D = T4 - T5;
Chris@42 82 T6 = T4 + T5;
Chris@42 83 T6h = T13 - T14;
Chris@42 84 T15 = T13 + T14;
Chris@42 85 Tc = Rp[WS(rs, 12)];
Chris@42 86 T1d = Ip[WS(rs, 12)];
Chris@42 87 T1e = Im[WS(rs, 3)];
Chris@42 88 {
Chris@42 89 E T8, T1c, T1f, T9, T18, T19;
Chris@42 90 T8 = Rp[WS(rs, 4)];
Chris@42 91 Td = Tb + Tc;
Chris@42 92 T1c = Tb - Tc;
Chris@42 93 T6k = T1d - T1e;
Chris@42 94 T1f = T1d + T1e;
Chris@42 95 T9 = Rm[WS(rs, 11)];
Chris@42 96 T18 = Ip[WS(rs, 4)];
Chris@42 97 T19 = Im[WS(rs, 11)];
Chris@42 98 T1g = T1c - T1f;
Chris@42 99 T2J = T1c + T1f;
Chris@42 100 Ta = T8 + T9;
Chris@42 101 T17 = T8 - T9;
Chris@42 102 T1a = T18 + T19;
Chris@42 103 T6j = T18 - T19;
Chris@42 104 }
Chris@42 105 }
Chris@42 106 }
Chris@42 107 {
Chris@42 108 E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K;
Chris@42 109 {
Chris@42 110 E T7i, T7, T1b, Te, T7j;
Chris@42 111 T7i = T3 - T6;
Chris@42 112 T7 = T3 + T6;
Chris@42 113 T2I = T17 + T1a;
Chris@42 114 T1b = T17 - T1a;
Chris@42 115 Te = Ta + Td;
Chris@42 116 T7M = Ta - Td;
Chris@42 117 T7j = T6k - T6j;
Chris@42 118 T6l = T6j + T6k;
Chris@42 119 T6i = T6g + T6h;
Chris@42 120 T7L = T6g - T6h;
Chris@42 121 T5K = T7 - Te;
Chris@42 122 Tf = T7 + Te;
Chris@42 123 T8k = T7i + T7j;
Chris@42 124 T7k = T7i - T7j;
Chris@42 125 T40 = T12 + T15;
Chris@42 126 T16 = T12 - T15;
Chris@42 127 T1h = T1b + T1g;
Chris@42 128 T4u = T1b - T1g;
Chris@42 129 }
Chris@42 130 T4t = T2G - T2D;
Chris@42 131 T2H = T2D + T2G;
Chris@42 132 T8x = T7M + T7L;
Chris@42 133 T7N = T7L - T7M;
Chris@42 134 T3i = FMA(KP707106781, T1h, T16);
Chris@42 135 T1i = FNMS(KP707106781, T1h, T16);
Chris@42 136 T2K = T2I - T2J;
Chris@42 137 T41 = T2I + T2J;
Chris@42 138 T3v = FMA(KP707106781, T2K, T2H);
Chris@42 139 T2L = FNMS(KP707106781, T2K, T2H);
Chris@42 140 T5f = FNMS(KP707106781, T4u, T4t);
Chris@42 141 T4v = FMA(KP707106781, T4u, T4t);
Chris@42 142 }
Chris@42 143 }
Chris@42 144 {
Chris@42 145 E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TU, T66, T23, T1Q, T1R, TX, T67;
Chris@42 146 E T1U, TY, T7u;
Chris@42 147 {
Chris@42 148 E TL, TM, TO, TP, T63, T64;
Chris@42 149 TL = Rm[0];
Chris@42 150 T6T = T6i + T6l;
Chris@42 151 T6m = T6i - T6l;
Chris@42 152 T52 = FMA(KP707106781, T41, T40);
Chris@42 153 T42 = FNMS(KP707106781, T41, T40);
Chris@42 154 TM = Rp[WS(rs, 15)];
Chris@42 155 TO = Rp[WS(rs, 7)];
Chris@42 156 TP = Rm[WS(rs, 8)];
Chris@42 157 {
Chris@42 158 E T1I, TN, TQ, T1J, T1Z, T20;
Chris@42 159 T1I = Ip[WS(rs, 15)];
Chris@42 160 T1Y = TL - TM;
Chris@42 161 TN = TL + TM;
Chris@42 162 T1H = TO - TP;
Chris@42 163 TQ = TO + TP;
Chris@42 164 T1J = Im[0];
Chris@42 165 T1Z = Ip[WS(rs, 7)];
Chris@42 166 T20 = Im[WS(rs, 8)];
Chris@42 167 TR = TN + TQ;
Chris@42 168 T7w = TN - TQ;
Chris@42 169 T1K = T1I + T1J;
Chris@42 170 T63 = T1I - T1J;
Chris@42 171 T64 = T1Z - T20;
Chris@42 172 T21 = T1Z + T20;
Chris@42 173 }
Chris@42 174 {
Chris@42 175 E TV, T1M, T1P, TW, T1S, T1T;
Chris@42 176 {
Chris@42 177 E TS, TT, T1N, T1O;
Chris@42 178 TS = Rp[WS(rs, 3)];
Chris@42 179 T65 = T63 + T64;
Chris@42 180 T7t = T63 - T64;
Chris@42 181 TT = Rm[WS(rs, 12)];
Chris@42 182 T1N = Ip[WS(rs, 3)];
Chris@42 183 T1O = Im[WS(rs, 12)];
Chris@42 184 TV = Rm[WS(rs, 4)];
Chris@42 185 T1M = TS - TT;
Chris@42 186 TU = TS + TT;
Chris@42 187 T66 = T1N - T1O;
Chris@42 188 T1P = T1N + T1O;
Chris@42 189 TW = Rp[WS(rs, 11)];
Chris@42 190 T1S = Ip[WS(rs, 11)];
Chris@42 191 T1T = Im[WS(rs, 4)];
Chris@42 192 }
Chris@42 193 T23 = T1M - T1P;
Chris@42 194 T1Q = T1M + T1P;
Chris@42 195 T1R = TV - TW;
Chris@42 196 TX = TV + TW;
Chris@42 197 T67 = T1S - T1T;
Chris@42 198 T1U = T1S + T1T;
Chris@42 199 }
Chris@42 200 }
Chris@42 201 TY = TU + TX;
Chris@42 202 T7u = TU - TX;
Chris@42 203 {
Chris@42 204 E T7x, T68, T1V, T24;
Chris@42 205 T7x = T67 - T66;
Chris@42 206 T68 = T66 + T67;
Chris@42 207 T1V = T1R + T1U;
Chris@42 208 T24 = T1R - T1U;
Chris@42 209 {
Chris@42 210 E T4l, T1L, T1W, T4j, T7v, T8n, T8o, T7y;
Chris@42 211 T62 = TR - TY;
Chris@42 212 TZ = TR + TY;
Chris@42 213 T6X = T65 + T68;
Chris@42 214 T69 = T65 - T68;
Chris@42 215 T4l = T1H + T1K;
Chris@42 216 T1L = T1H - T1K;
Chris@42 217 T1W = T1Q - T1V;
Chris@42 218 T4j = T1Q + T1V;
Chris@42 219 T7v = T7t - T7u;
Chris@42 220 T8n = T7u + T7t;
Chris@42 221 T8o = T7w + T7x;
Chris@42 222 T7y = T7w - T7x;
Chris@42 223 {
Chris@42 224 E T4i, T22, T25, T4m;
Chris@42 225 T4i = T1Y + T21;
Chris@42 226 T22 = T1Y - T21;
Chris@42 227 T3p = FMA(KP707106781, T1W, T1L);
Chris@42 228 T1X = FNMS(KP707106781, T1W, T1L);
Chris@42 229 T8B = FMA(KP414213562, T8n, T8o);
Chris@42 230 T8p = FNMS(KP414213562, T8o, T8n);
Chris@42 231 T25 = T23 + T24;
Chris@42 232 T4m = T23 - T24;
Chris@42 233 T3o = FMA(KP707106781, T25, T22);
Chris@42 234 T26 = FNMS(KP707106781, T25, T22);
Chris@42 235 T58 = FMA(KP707106781, T4m, T4l);
Chris@42 236 T4n = FNMS(KP707106781, T4m, T4l);
Chris@42 237 T7T = FNMS(KP414213562, T7v, T7y);
Chris@42 238 T7z = FMA(KP414213562, T7y, T7v);
Chris@42 239 T59 = FMA(KP707106781, T4j, T4i);
Chris@42 240 T4k = FNMS(KP707106781, T4j, T4i);
Chris@42 241 }
Chris@42 242 }
Chris@42 243 }
Chris@42 244 }
Chris@42 245 }
Chris@42 246 {
Chris@42 247 E T5T, T60, T4c, T4b;
Chris@42 248 {
Chris@42 249 E T2p, T28, T2b, T7D, TC, T2s, T7A, T5W, TF, T2j, T5X, T2i, TI, T2k, T2u;
Chris@42 250 E T2h;
Chris@42 251 {
Chris@42 252 E Tz, Ty, TA, Tw, Tx;
Chris@42 253 Tw = Rp[WS(rs, 1)];
Chris@42 254 Tx = Rm[WS(rs, 14)];
Chris@42 255 Tz = Rp[WS(rs, 9)];
Chris@42 256 T6p = T69 - T62;
Chris@42 257 T6a = T62 + T69;
Chris@42 258 Ty = Tw + Tx;
Chris@42 259 T2p = Tw - Tx;
Chris@42 260 TA = Rm[WS(rs, 6)];
Chris@42 261 {
Chris@42 262 E T5U, T5V, T2d, T2g;
Chris@42 263 {
Chris@42 264 E T2q, T2r, T29, T2a, TB;
Chris@42 265 T29 = Ip[WS(rs, 1)];
Chris@42 266 T2a = Im[WS(rs, 14)];
Chris@42 267 TB = Tz + TA;
Chris@42 268 T28 = Tz - TA;
Chris@42 269 T2q = Ip[WS(rs, 9)];
Chris@42 270 T5U = T29 - T2a;
Chris@42 271 T2b = T29 + T2a;
Chris@42 272 T2r = Im[WS(rs, 6)];
Chris@42 273 T7D = Ty - TB;
Chris@42 274 TC = Ty + TB;
Chris@42 275 T2s = T2q + T2r;
Chris@42 276 T5V = T2q - T2r;
Chris@42 277 }
Chris@42 278 {
Chris@42 279 E T2e, T2f, TD, TE, TG, TH;
Chris@42 280 TD = Rp[WS(rs, 5)];
Chris@42 281 TE = Rm[WS(rs, 10)];
Chris@42 282 T7A = T5U - T5V;
Chris@42 283 T5W = T5U + T5V;
Chris@42 284 T2e = Ip[WS(rs, 5)];
Chris@42 285 T2d = TD - TE;
Chris@42 286 TF = TD + TE;
Chris@42 287 T2f = Im[WS(rs, 10)];
Chris@42 288 TG = Rm[WS(rs, 2)];
Chris@42 289 TH = Rp[WS(rs, 13)];
Chris@42 290 T2j = Ip[WS(rs, 13)];
Chris@42 291 T5X = T2e - T2f;
Chris@42 292 T2g = T2e + T2f;
Chris@42 293 T2i = TG - TH;
Chris@42 294 TI = TG + TH;
Chris@42 295 T2k = Im[WS(rs, 2)];
Chris@42 296 }
Chris@42 297 T2u = T2d - T2g;
Chris@42 298 T2h = T2d + T2g;
Chris@42 299 }
Chris@42 300 }
Chris@42 301 {
Chris@42 302 E TJ, T7B, T2l, T5Y;
Chris@42 303 TJ = TF + TI;
Chris@42 304 T7B = TF - TI;
Chris@42 305 T2l = T2j + T2k;
Chris@42 306 T5Y = T2j - T2k;
Chris@42 307 {
Chris@42 308 E T4e, T2c, T2v, T8q, T7C, T7F, T8r, T2n, T7E, T2m, T5Z, T4f, T2t, T2w;
Chris@42 309 T4e = T2b - T28;
Chris@42 310 T2c = T28 + T2b;
Chris@42 311 TK = TC + TJ;
Chris@42 312 T5T = TC - TJ;
Chris@42 313 T7E = T5Y - T5X;
Chris@42 314 T5Z = T5X + T5Y;
Chris@42 315 T2m = T2i + T2l;
Chris@42 316 T2v = T2i - T2l;
Chris@42 317 T60 = T5W - T5Z;
Chris@42 318 T6W = T5W + T5Z;
Chris@42 319 T8q = T7B + T7A;
Chris@42 320 T7C = T7A - T7B;
Chris@42 321 T7F = T7D - T7E;
Chris@42 322 T8r = T7D + T7E;
Chris@42 323 T2n = T2h - T2m;
Chris@42 324 T4c = T2h + T2m;
Chris@42 325 T4b = T2p + T2s;
Chris@42 326 T2t = T2p - T2s;
Chris@42 327 T2w = T2u + T2v;
Chris@42 328 T4f = T2v - T2u;
Chris@42 329 T8s = FMA(KP414213562, T8r, T8q);
Chris@42 330 T8A = FNMS(KP414213562, T8q, T8r);
Chris@42 331 T2o = FNMS(KP707106781, T2n, T2c);
Chris@42 332 T3m = FMA(KP707106781, T2n, T2c);
Chris@42 333 T3l = FMA(KP707106781, T2w, T2t);
Chris@42 334 T2x = FNMS(KP707106781, T2w, T2t);
Chris@42 335 T55 = FMA(KP707106781, T4f, T4e);
Chris@42 336 T4g = FNMS(KP707106781, T4f, T4e);
Chris@42 337 T7S = FMA(KP414213562, T7C, T7F);
Chris@42 338 T7G = FNMS(KP414213562, T7F, T7C);
Chris@42 339 }
Chris@42 340 }
Chris@42 341 }
Chris@42 342 {
Chris@42 343 E T43, T1y, T7o, Tm, T7p, T44, T1D, Tq, T1o, Tp, T5L, T1m, Tr, T1p, T1q;
Chris@42 344 {
Chris@42 345 E Tj, T1z, Ti, T5O, T1x, Tk, T1A, T1B;
Chris@42 346 {
Chris@42 347 E Tg, Th, T1v, T1w;
Chris@42 348 Tg = Rp[WS(rs, 2)];
Chris@42 349 T56 = FMA(KP707106781, T4c, T4b);
Chris@42 350 T4d = FNMS(KP707106781, T4c, T4b);
Chris@42 351 T6o = T5T + T60;
Chris@42 352 T61 = T5T - T60;
Chris@42 353 Th = Rm[WS(rs, 13)];
Chris@42 354 T1v = Ip[WS(rs, 2)];
Chris@42 355 T1w = Im[WS(rs, 13)];
Chris@42 356 Tj = Rp[WS(rs, 10)];
Chris@42 357 T1z = Tg - Th;
Chris@42 358 Ti = Tg + Th;
Chris@42 359 T5O = T1v - T1w;
Chris@42 360 T1x = T1v + T1w;
Chris@42 361 Tk = Rm[WS(rs, 5)];
Chris@42 362 T1A = Ip[WS(rs, 10)];
Chris@42 363 T1B = Im[WS(rs, 5)];
Chris@42 364 }
Chris@42 365 {
Chris@42 366 E Tn, To, T1k, T1l;
Chris@42 367 Tn = Rm[WS(rs, 1)];
Chris@42 368 {
Chris@42 369 E T1u, Tl, T5P, T1C;
Chris@42 370 T1u = Tj - Tk;
Chris@42 371 Tl = Tj + Tk;
Chris@42 372 T5P = T1A - T1B;
Chris@42 373 T1C = T1A + T1B;
Chris@42 374 T43 = T1x - T1u;
Chris@42 375 T1y = T1u + T1x;
Chris@42 376 T7o = Ti - Tl;
Chris@42 377 Tm = Ti + Tl;
Chris@42 378 T5Q = T5O + T5P;
Chris@42 379 T7p = T5O - T5P;
Chris@42 380 T44 = T1z + T1C;
Chris@42 381 T1D = T1z - T1C;
Chris@42 382 To = Rp[WS(rs, 14)];
Chris@42 383 }
Chris@42 384 T1k = Ip[WS(rs, 14)];
Chris@42 385 T1l = Im[WS(rs, 1)];
Chris@42 386 Tq = Rp[WS(rs, 6)];
Chris@42 387 T1o = Tn - To;
Chris@42 388 Tp = Tn + To;
Chris@42 389 T5L = T1k - T1l;
Chris@42 390 T1m = T1k + T1l;
Chris@42 391 Tr = Rm[WS(rs, 9)];
Chris@42 392 T1p = Ip[WS(rs, 6)];
Chris@42 393 T1q = Im[WS(rs, 9)];
Chris@42 394 }
Chris@42 395 }
Chris@42 396 {
Chris@42 397 E T46, T47, T7P, T7O, T2N, T1t, T1E, T2M, T4w, T4x;
Chris@42 398 {
Chris@42 399 E T1n, Tt, T1s, T7n, T7q, T7m, T7l;
Chris@42 400 {
Chris@42 401 E T1j, Ts, T5M, T1r;
Chris@42 402 T1j = Tq - Tr;
Chris@42 403 Ts = Tq + Tr;
Chris@42 404 T5M = T1p - T1q;
Chris@42 405 T1r = T1p + T1q;
Chris@42 406 T46 = T1j + T1m;
Chris@42 407 T1n = T1j - T1m;
Chris@42 408 T7m = Tp - Ts;
Chris@42 409 Tt = Tp + Ts;
Chris@42 410 T5N = T5L + T5M;
Chris@42 411 T7l = T5L - T5M;
Chris@42 412 T47 = T1o + T1r;
Chris@42 413 T1s = T1o - T1r;
Chris@42 414 }
Chris@42 415 T7P = T7m + T7l;
Chris@42 416 T7n = T7l - T7m;
Chris@42 417 T7q = T7o + T7p;
Chris@42 418 T7O = T7o - T7p;
Chris@42 419 T6f = Tm - Tt;
Chris@42 420 Tu = Tm + Tt;
Chris@42 421 T8y = T7q + T7n;
Chris@42 422 T7r = T7n - T7q;
Chris@42 423 T2N = FMA(KP414213562, T1n, T1s);
Chris@42 424 T1t = FNMS(KP414213562, T1s, T1n);
Chris@42 425 T1E = FMA(KP414213562, T1D, T1y);
Chris@42 426 T2M = FNMS(KP414213562, T1y, T1D);
Chris@42 427 }
Chris@42 428 T8l = T7O + T7P;
Chris@42 429 T7Q = T7O - T7P;
Chris@42 430 T3w = T1E + T1t;
Chris@42 431 T1F = T1t - T1E;
Chris@42 432 T45 = FNMS(KP414213562, T44, T43);
Chris@42 433 T4w = FMA(KP414213562, T43, T44);
Chris@42 434 T4x = FMA(KP414213562, T46, T47);
Chris@42 435 T48 = FNMS(KP414213562, T47, T46);
Chris@42 436 T3j = T2M + T2N;
Chris@42 437 T2O = T2M - T2N;
Chris@42 438 T53 = T4w + T4x;
Chris@42 439 T4y = T4w - T4x;
Chris@42 440 }
Chris@42 441 }
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E T72, T5g, T49, T78, T77, T73, T7s, T7U, T7R, T7H, T3f, T3e, T3d;
Chris@42 445 {
Chris@42 446 E T5R, T8m, T8C, T8z, T8t, T8e, T86, T88, T8h, T8f, T8i, T8c, T8g;
Chris@42 447 {
Chris@42 448 E T6P, T6Q, T6Z, T6S, T6R;
Chris@42 449 {
Chris@42 450 E Tv, T10, T6V, T6Y, T6U;
Chris@42 451 T72 = Tf - Tu;
Chris@42 452 Tv = Tf + Tu;
Chris@42 453 T6U = T5Q + T5N;
Chris@42 454 T5R = T5N - T5Q;
Chris@42 455 T5g = T48 - T45;
Chris@42 456 T49 = T45 + T48;
Chris@42 457 T10 = TK + TZ;
Chris@42 458 T78 = TK - TZ;
Chris@42 459 T77 = T6T - T6U;
Chris@42 460 T6V = T6T + T6U;
Chris@42 461 T6Y = T6W + T6X;
Chris@42 462 T73 = T6X - T6W;
Chris@42 463 T6P = W[30];
Chris@42 464 Rp[0] = Tv + T10;
Chris@42 465 T6Q = Tv - T10;
Chris@42 466 Rm[0] = T6V + T6Y;
Chris@42 467 T6Z = T6V - T6Y;
Chris@42 468 T6S = W[31];
Chris@42 469 T6R = T6P * T6Q;
Chris@42 470 }
Chris@42 471 {
Chris@42 472 E T8O, T8W, T8Q, T8Z, T8X, T90, T8U, T8Y;
Chris@42 473 {
Chris@42 474 E T8R, T8S, T8M, T8N, T70;
Chris@42 475 T8M = FMA(KP707106781, T8l, T8k);
Chris@42 476 T8m = FNMS(KP707106781, T8l, T8k);
Chris@42 477 T8C = T8A - T8B;
Chris@42 478 T8N = T8A + T8B;
Chris@42 479 T70 = T6S * T6Q;
Chris@42 480 Rp[WS(rs, 8)] = FNMS(T6S, T6Z, T6R);
Chris@42 481 T8R = FMA(KP707106781, T8y, T8x);
Chris@42 482 T8z = FNMS(KP707106781, T8y, T8x);
Chris@42 483 T8O = FNMS(KP923879532, T8N, T8M);
Chris@42 484 T8W = FMA(KP923879532, T8N, T8M);
Chris@42 485 Rm[WS(rs, 8)] = FMA(T6P, T6Z, T70);
Chris@42 486 T8S = T8s + T8p;
Chris@42 487 T8t = T8p - T8s;
Chris@42 488 {
Chris@42 489 E T8L, T8T, T8P, T8V;
Chris@42 490 T8L = W[34];
Chris@42 491 T8Q = W[35];
Chris@42 492 T8V = W[2];
Chris@42 493 T8Z = FMA(KP923879532, T8S, T8R);
Chris@42 494 T8T = FNMS(KP923879532, T8S, T8R);
Chris@42 495 T8P = T8L * T8O;
Chris@42 496 T8X = T8V * T8W;
Chris@42 497 T90 = T8V * T8Z;
Chris@42 498 T8U = T8L * T8T;
Chris@42 499 Rp[WS(rs, 9)] = FNMS(T8Q, T8T, T8P);
Chris@42 500 T8Y = W[3];
Chris@42 501 }
Chris@42 502 }
Chris@42 503 {
Chris@42 504 E T89, T8a, T84, T85;
Chris@42 505 T84 = FNMS(KP707106781, T7r, T7k);
Chris@42 506 T7s = FMA(KP707106781, T7r, T7k);
Chris@42 507 Rm[WS(rs, 9)] = FMA(T8Q, T8O, T8U);
Chris@42 508 T85 = T7S + T7T;
Chris@42 509 T7U = T7S - T7T;
Chris@42 510 Rm[WS(rs, 1)] = FMA(T8Y, T8W, T90);
Chris@42 511 Rp[WS(rs, 1)] = FNMS(T8Y, T8Z, T8X);
Chris@42 512 T7R = FMA(KP707106781, T7Q, T7N);
Chris@42 513 T89 = FNMS(KP707106781, T7Q, T7N);
Chris@42 514 T8e = FMA(KP923879532, T85, T84);
Chris@42 515 T86 = FNMS(KP923879532, T85, T84);
Chris@42 516 T8a = T7G + T7z;
Chris@42 517 T7H = T7z - T7G;
Chris@42 518 {
Chris@42 519 E T83, T8b, T87, T8d;
Chris@42 520 T83 = W[26];
Chris@42 521 T88 = W[27];
Chris@42 522 T8d = W[58];
Chris@42 523 T8h = FMA(KP923879532, T8a, T89);
Chris@42 524 T8b = FNMS(KP923879532, T8a, T89);
Chris@42 525 T87 = T83 * T86;
Chris@42 526 T8f = T8d * T8e;
Chris@42 527 T8i = T8d * T8h;
Chris@42 528 T8c = T83 * T8b;
Chris@42 529 Rp[WS(rs, 7)] = FNMS(T88, T8b, T87);
Chris@42 530 T8g = W[59];
Chris@42 531 }
Chris@42 532 }
Chris@42 533 }
Chris@42 534 }
Chris@42 535 {
Chris@42 536 E T5S, T6q, T6n, T6K, T6C, T6b, T6E, T6N, T6L, T6O, T6I, T6M;
Chris@42 537 {
Chris@42 538 E T6F, T6G, T6A, T6B;
Chris@42 539 T6A = T5K - T5R;
Chris@42 540 T5S = T5K + T5R;
Chris@42 541 Rm[WS(rs, 7)] = FMA(T88, T86, T8c);
Chris@42 542 T6B = T6p - T6o;
Chris@42 543 T6q = T6o + T6p;
Chris@42 544 Rm[WS(rs, 15)] = FMA(T8g, T8e, T8i);
Chris@42 545 Rp[WS(rs, 15)] = FNMS(T8g, T8h, T8f);
Chris@42 546 T6n = T6f + T6m;
Chris@42 547 T6F = T6m - T6f;
Chris@42 548 T6K = FMA(KP707106781, T6B, T6A);
Chris@42 549 T6C = FNMS(KP707106781, T6B, T6A);
Chris@42 550 T6G = T61 - T6a;
Chris@42 551 T6b = T61 + T6a;
Chris@42 552 {
Chris@42 553 E T6z, T6H, T6D, T6J;
Chris@42 554 T6z = W[54];
Chris@42 555 T6E = W[55];
Chris@42 556 T6J = W[22];
Chris@42 557 T6N = FMA(KP707106781, T6G, T6F);
Chris@42 558 T6H = FNMS(KP707106781, T6G, T6F);
Chris@42 559 T6D = T6z * T6C;
Chris@42 560 T6L = T6J * T6K;
Chris@42 561 T6O = T6J * T6N;
Chris@42 562 T6I = T6z * T6H;
Chris@42 563 Rp[WS(rs, 14)] = FNMS(T6E, T6H, T6D);
Chris@42 564 T6M = W[23];
Chris@42 565 }
Chris@42 566 }
Chris@42 567 {
Chris@42 568 E T8G, T8F, T8J, T8H, T8I, T8u;
Chris@42 569 Rm[WS(rs, 14)] = FMA(T6E, T6C, T6I);
Chris@42 570 Rm[WS(rs, 6)] = FMA(T6M, T6K, T6O);
Chris@42 571 Rp[WS(rs, 6)] = FNMS(T6M, T6N, T6L);
Chris@42 572 T8G = FMA(KP923879532, T8t, T8m);
Chris@42 573 T8u = FNMS(KP923879532, T8t, T8m);
Chris@42 574 {
Chris@42 575 E T8j, T8w, T8D, T8v, T8E;
Chris@42 576 T8j = W[50];
Chris@42 577 T8w = W[51];
Chris@42 578 T8F = W[18];
Chris@42 579 T8J = FMA(KP923879532, T8C, T8z);
Chris@42 580 T8D = FNMS(KP923879532, T8C, T8z);
Chris@42 581 T8v = T8j * T8u;
Chris@42 582 T8E = T8w * T8u;
Chris@42 583 T8H = T8F * T8G;
Chris@42 584 T8I = W[19];
Chris@42 585 Rp[WS(rs, 13)] = FNMS(T8w, T8D, T8v);
Chris@42 586 Rm[WS(rs, 13)] = FMA(T8j, T8D, T8E);
Chris@42 587 }
Chris@42 588 {
Chris@42 589 E T6c, T6u, T6x, T6r, T8K, T5J, T6e;
Chris@42 590 Rp[WS(rs, 5)] = FNMS(T8I, T8J, T8H);
Chris@42 591 T8K = T8I * T8G;
Chris@42 592 Rm[WS(rs, 5)] = FMA(T8F, T8J, T8K);
Chris@42 593 T6c = FNMS(KP707106781, T6b, T5S);
Chris@42 594 T6u = FMA(KP707106781, T6b, T5S);
Chris@42 595 T6x = FMA(KP707106781, T6q, T6n);
Chris@42 596 T6r = FNMS(KP707106781, T6q, T6n);
Chris@42 597 T5J = W[38];
Chris@42 598 T6e = W[39];
Chris@42 599 {
Chris@42 600 E T6t, T6w, T6d, T6s, T6v, T6y;
Chris@42 601 T6t = W[6];
Chris@42 602 T6w = W[7];
Chris@42 603 T6d = T5J * T6c;
Chris@42 604 T6s = T6e * T6c;
Chris@42 605 T6v = T6t * T6u;
Chris@42 606 T6y = T6w * T6u;
Chris@42 607 Rp[WS(rs, 10)] = FNMS(T6e, T6r, T6d);
Chris@42 608 Rm[WS(rs, 10)] = FMA(T5J, T6r, T6s);
Chris@42 609 Rp[WS(rs, 2)] = FNMS(T6w, T6x, T6v);
Chris@42 610 Rm[WS(rs, 2)] = FMA(T6t, T6x, T6y);
Chris@42 611 }
Chris@42 612 }
Chris@42 613 }
Chris@42 614 }
Chris@42 615 }
Chris@42 616 {
Chris@42 617 E T7c, T7f, T7e, T7g, T7d;
Chris@42 618 {
Chris@42 619 E T71, T74, T79, T76, T75, T7b, T7a;
Chris@42 620 T71 = W[46];
Chris@42 621 T7c = T72 + T73;
Chris@42 622 T74 = T72 - T73;
Chris@42 623 T7f = T78 + T77;
Chris@42 624 T79 = T77 - T78;
Chris@42 625 T76 = W[47];
Chris@42 626 T75 = T71 * T74;
Chris@42 627 T7b = W[14];
Chris@42 628 T7a = T71 * T79;
Chris@42 629 T7e = W[15];
Chris@42 630 Rp[WS(rs, 12)] = FNMS(T76, T79, T75);
Chris@42 631 T7g = T7b * T7f;
Chris@42 632 T7d = T7b * T7c;
Chris@42 633 Rm[WS(rs, 12)] = FMA(T76, T74, T7a);
Chris@42 634 }
Chris@42 635 {
Chris@42 636 E T81, T7X, T80, T7Z, T82;
Chris@42 637 Rm[WS(rs, 4)] = FMA(T7e, T7c, T7g);
Chris@42 638 Rp[WS(rs, 4)] = FNMS(T7e, T7f, T7d);
Chris@42 639 {
Chris@42 640 E T7h, T7Y, T7I, T7V, T7K, T7J, T7W;
Chris@42 641 T7h = W[42];
Chris@42 642 T7Y = FMA(KP923879532, T7H, T7s);
Chris@42 643 T7I = FNMS(KP923879532, T7H, T7s);
Chris@42 644 T81 = FMA(KP923879532, T7U, T7R);
Chris@42 645 T7V = FNMS(KP923879532, T7U, T7R);
Chris@42 646 T7K = W[43];
Chris@42 647 T7J = T7h * T7I;
Chris@42 648 T7X = W[10];
Chris@42 649 T80 = W[11];
Chris@42 650 T7W = T7K * T7I;
Chris@42 651 Rp[WS(rs, 11)] = FNMS(T7K, T7V, T7J);
Chris@42 652 T7Z = T7X * T7Y;
Chris@42 653 T82 = T80 * T7Y;
Chris@42 654 Rm[WS(rs, 11)] = FMA(T7h, T7V, T7W);
Chris@42 655 }
Chris@42 656 {
Chris@42 657 E T2P, T37, T1G, T32, T2R, T2Q, T38, T2z, T27, T2y;
Chris@42 658 T2P = FMA(KP923879532, T2O, T2L);
Chris@42 659 T37 = FNMS(KP923879532, T2O, T2L);
Chris@42 660 Rp[WS(rs, 3)] = FNMS(T80, T81, T7Z);
Chris@42 661 Rm[WS(rs, 3)] = FMA(T7X, T81, T82);
Chris@42 662 T1G = FMA(KP923879532, T1F, T1i);
Chris@42 663 T32 = FNMS(KP923879532, T1F, T1i);
Chris@42 664 T2R = FNMS(KP668178637, T1X, T26);
Chris@42 665 T27 = FMA(KP668178637, T26, T1X);
Chris@42 666 T2y = FNMS(KP668178637, T2x, T2o);
Chris@42 667 T2Q = FMA(KP668178637, T2o, T2x);
Chris@42 668 T38 = T2y + T27;
Chris@42 669 T2z = T27 - T2y;
Chris@42 670 {
Chris@42 671 E T2C, T2A, T3c, T34, T2U, T39, T36, T31;
Chris@42 672 {
Chris@42 673 E T11, T2W, T2S, T33;
Chris@42 674 T11 = W[40];
Chris@42 675 T2C = W[41];
Chris@42 676 T2A = FNMS(KP831469612, T2z, T1G);
Chris@42 677 T2W = FMA(KP831469612, T2z, T1G);
Chris@42 678 T2S = T2Q - T2R;
Chris@42 679 T33 = T2Q + T2R;
Chris@42 680 {
Chris@42 681 E T2V, T2B, T2T, T2Z, T2X, T2Y, T30;
Chris@42 682 T2V = W[8];
Chris@42 683 T2B = T11 * T2A;
Chris@42 684 T3c = FMA(KP831469612, T33, T32);
Chris@42 685 T34 = FNMS(KP831469612, T33, T32);
Chris@42 686 T2T = FNMS(KP831469612, T2S, T2P);
Chris@42 687 T2Z = FMA(KP831469612, T2S, T2P);
Chris@42 688 T2X = T2V * T2W;
Chris@42 689 T2Y = W[9];
Chris@42 690 T30 = T2V * T2Z;
Chris@42 691 Ip[WS(rs, 10)] = FNMS(T2C, T2T, T2B);
Chris@42 692 T2U = T11 * T2T;
Chris@42 693 Ip[WS(rs, 2)] = FNMS(T2Y, T2Z, T2X);
Chris@42 694 Im[WS(rs, 2)] = FMA(T2Y, T2W, T30);
Chris@42 695 }
Chris@42 696 }
Chris@42 697 T39 = FNMS(KP831469612, T38, T37);
Chris@42 698 T3f = FMA(KP831469612, T38, T37);
Chris@42 699 Im[WS(rs, 10)] = FMA(T2C, T2A, T2U);
Chris@42 700 T36 = W[25];
Chris@42 701 T31 = W[24];
Chris@42 702 {
Chris@42 703 E T3b, T3g, T3a, T35;
Chris@42 704 T3e = W[57];
Chris@42 705 T3a = T36 * T34;
Chris@42 706 T35 = T31 * T34;
Chris@42 707 T3b = W[56];
Chris@42 708 T3g = T3e * T3c;
Chris@42 709 Im[WS(rs, 6)] = FMA(T31, T39, T3a);
Chris@42 710 Ip[WS(rs, 6)] = FNMS(T36, T39, T35);
Chris@42 711 T3d = T3b * T3c;
Chris@42 712 Im[WS(rs, 14)] = FMA(T3b, T3f, T3g);
Chris@42 713 }
Chris@42 714 }
Chris@42 715 }
Chris@42 716 }
Chris@42 717 }
Chris@42 718 {
Chris@42 719 E T4G, T4J, T4I, T4F, T4K;
Chris@42 720 {
Chris@42 721 E T4z, T4R, T4a, T4M, T4h, T4o, T4C, T4N, T4A, T4B;
Chris@42 722 T4z = FMA(KP923879532, T4y, T4v);
Chris@42 723 T4R = FNMS(KP923879532, T4y, T4v);
Chris@42 724 T4a = FNMS(KP923879532, T49, T42);
Chris@42 725 T4M = FMA(KP923879532, T49, T42);
Chris@42 726 Ip[WS(rs, 14)] = FNMS(T3e, T3f, T3d);
Chris@42 727 T4h = FNMS(KP668178637, T4g, T4d);
Chris@42 728 T4A = FMA(KP668178637, T4d, T4g);
Chris@42 729 T4B = FMA(KP668178637, T4k, T4n);
Chris@42 730 T4o = FNMS(KP668178637, T4n, T4k);
Chris@42 731 T4C = T4A - T4B;
Chris@42 732 T4N = T4A + T4B;
Chris@42 733 {
Chris@42 734 E T4W, T4Z, T4q, T4X, T50, T4Y;
Chris@42 735 {
Chris@42 736 E T4L, T4Q, T4O, T4p, T4S, T4P, T4U, T4V, T4T;
Chris@42 737 T4L = W[20];
Chris@42 738 T4Q = W[21];
Chris@42 739 T4W = FMA(KP831469612, T4N, T4M);
Chris@42 740 T4O = FNMS(KP831469612, T4N, T4M);
Chris@42 741 T4p = T4h + T4o;
Chris@42 742 T4S = T4h - T4o;
Chris@42 743 T4P = T4L * T4O;
Chris@42 744 T4V = W[52];
Chris@42 745 T4Z = FNMS(KP831469612, T4S, T4R);
Chris@42 746 T4T = FMA(KP831469612, T4S, T4R);
Chris@42 747 T4q = FNMS(KP831469612, T4p, T4a);
Chris@42 748 T4G = FMA(KP831469612, T4p, T4a);
Chris@42 749 Ip[WS(rs, 5)] = FNMS(T4Q, T4T, T4P);
Chris@42 750 T4U = T4L * T4T;
Chris@42 751 T4X = T4V * T4W;
Chris@42 752 T50 = T4V * T4Z;
Chris@42 753 T4Y = W[53];
Chris@42 754 Im[WS(rs, 5)] = FMA(T4Q, T4O, T4U);
Chris@42 755 }
Chris@42 756 {
Chris@42 757 E T4D, T4s, T3Z, T4E, T4r;
Chris@42 758 T4J = FMA(KP831469612, T4C, T4z);
Chris@42 759 T4D = FNMS(KP831469612, T4C, T4z);
Chris@42 760 T4s = W[37];
Chris@42 761 Im[WS(rs, 13)] = FMA(T4Y, T4W, T50);
Chris@42 762 Ip[WS(rs, 13)] = FNMS(T4Y, T4Z, T4X);
Chris@42 763 T3Z = W[36];
Chris@42 764 T4E = T4s * T4q;
Chris@42 765 T4I = W[5];
Chris@42 766 T4r = T3Z * T4q;
Chris@42 767 Im[WS(rs, 9)] = FMA(T3Z, T4D, T4E);
Chris@42 768 T4F = W[4];
Chris@42 769 T4K = T4I * T4G;
Chris@42 770 Ip[WS(rs, 9)] = FNMS(T4s, T4D, T4r);
Chris@42 771 }
Chris@42 772 }
Chris@42 773 }
Chris@42 774 {
Chris@42 775 E T3E, T3H, T3G, T3D, T3I;
Chris@42 776 {
Chris@42 777 E T3x, T3P, T3k, T3K, T3n, T3q, T3A, T3L, T4H, T3y, T3z;
Chris@42 778 T3x = FMA(KP923879532, T3w, T3v);
Chris@42 779 T3P = FNMS(KP923879532, T3w, T3v);
Chris@42 780 T4H = T4F * T4G;
Chris@42 781 Im[WS(rs, 1)] = FMA(T4F, T4J, T4K);
Chris@42 782 T3k = FMA(KP923879532, T3j, T3i);
Chris@42 783 T3K = FNMS(KP923879532, T3j, T3i);
Chris@42 784 T3y = FMA(KP198912367, T3l, T3m);
Chris@42 785 T3n = FNMS(KP198912367, T3m, T3l);
Chris@42 786 Ip[WS(rs, 1)] = FNMS(T4I, T4J, T4H);
Chris@42 787 T3z = FNMS(KP198912367, T3o, T3p);
Chris@42 788 T3q = FMA(KP198912367, T3p, T3o);
Chris@42 789 T3A = T3y + T3z;
Chris@42 790 T3L = T3z - T3y;
Chris@42 791 {
Chris@42 792 E T3U, T3X, T3s, T3V, T3Y, T3W;
Chris@42 793 {
Chris@42 794 E T3J, T3O, T3M, T3r, T3Q, T3N, T3S, T3T, T3R;
Chris@42 795 T3J = W[48];
Chris@42 796 T3O = W[49];
Chris@42 797 T3U = FMA(KP980785280, T3L, T3K);
Chris@42 798 T3M = FNMS(KP980785280, T3L, T3K);
Chris@42 799 T3r = T3n + T3q;
Chris@42 800 T3Q = T3n - T3q;
Chris@42 801 T3N = T3J * T3M;
Chris@42 802 T3T = W[16];
Chris@42 803 T3X = FMA(KP980785280, T3Q, T3P);
Chris@42 804 T3R = FNMS(KP980785280, T3Q, T3P);
Chris@42 805 T3s = FNMS(KP980785280, T3r, T3k);
Chris@42 806 T3E = FMA(KP980785280, T3r, T3k);
Chris@42 807 Ip[WS(rs, 12)] = FNMS(T3O, T3R, T3N);
Chris@42 808 T3S = T3J * T3R;
Chris@42 809 T3V = T3T * T3U;
Chris@42 810 T3Y = T3T * T3X;
Chris@42 811 T3W = W[17];
Chris@42 812 Im[WS(rs, 12)] = FMA(T3O, T3M, T3S);
Chris@42 813 }
Chris@42 814 {
Chris@42 815 E T3B, T3u, T3h, T3C, T3t;
Chris@42 816 T3H = FMA(KP980785280, T3A, T3x);
Chris@42 817 T3B = FNMS(KP980785280, T3A, T3x);
Chris@42 818 T3u = W[33];
Chris@42 819 Im[WS(rs, 4)] = FMA(T3W, T3U, T3Y);
Chris@42 820 Ip[WS(rs, 4)] = FNMS(T3W, T3X, T3V);
Chris@42 821 T3h = W[32];
Chris@42 822 T3C = T3u * T3s;
Chris@42 823 T3G = W[1];
Chris@42 824 T3t = T3h * T3s;
Chris@42 825 Im[WS(rs, 8)] = FMA(T3h, T3B, T3C);
Chris@42 826 T3D = W[0];
Chris@42 827 T3I = T3G * T3E;
Chris@42 828 Ip[WS(rs, 8)] = FNMS(T3u, T3B, T3t);
Chris@42 829 }
Chris@42 830 }
Chris@42 831 }
Chris@42 832 {
Chris@42 833 E T5h, T5z, T54, T5u, T57, T5a, T5k, T5v, T3F, T5i, T5j;
Chris@42 834 T5h = FMA(KP923879532, T5g, T5f);
Chris@42 835 T5z = FNMS(KP923879532, T5g, T5f);
Chris@42 836 T3F = T3D * T3E;
Chris@42 837 Im[0] = FMA(T3D, T3H, T3I);
Chris@42 838 T54 = FNMS(KP923879532, T53, T52);
Chris@42 839 T5u = FMA(KP923879532, T53, T52);
Chris@42 840 T5i = FMA(KP198912367, T55, T56);
Chris@42 841 T57 = FNMS(KP198912367, T56, T55);
Chris@42 842 Ip[0] = FNMS(T3G, T3H, T3F);
Chris@42 843 T5j = FMA(KP198912367, T58, T59);
Chris@42 844 T5a = FNMS(KP198912367, T59, T58);
Chris@42 845 T5k = T5i - T5j;
Chris@42 846 T5v = T5i + T5j;
Chris@42 847 {
Chris@42 848 E T5E, T5H, T5c, T5F, T5I, T5G;
Chris@42 849 {
Chris@42 850 E T5t, T5y, T5w, T5b, T5A, T5x, T5C, T5D, T5B;
Chris@42 851 T5t = W[28];
Chris@42 852 T5y = W[29];
Chris@42 853 T5E = FMA(KP980785280, T5v, T5u);
Chris@42 854 T5w = FNMS(KP980785280, T5v, T5u);
Chris@42 855 T5b = T57 + T5a;
Chris@42 856 T5A = T5a - T57;
Chris@42 857 T5x = T5t * T5w;
Chris@42 858 T5D = W[60];
Chris@42 859 T5H = FNMS(KP980785280, T5A, T5z);
Chris@42 860 T5B = FMA(KP980785280, T5A, T5z);
Chris@42 861 T5c = FMA(KP980785280, T5b, T54);
Chris@42 862 T5o = FNMS(KP980785280, T5b, T54);
Chris@42 863 Ip[WS(rs, 7)] = FNMS(T5y, T5B, T5x);
Chris@42 864 T5C = T5t * T5B;
Chris@42 865 T5F = T5D * T5E;
Chris@42 866 T5I = T5D * T5H;
Chris@42 867 T5G = W[61];
Chris@42 868 Im[WS(rs, 7)] = FMA(T5y, T5w, T5C);
Chris@42 869 }
Chris@42 870 {
Chris@42 871 E T5l, T5e, T51, T5m, T5d;
Chris@42 872 T5r = FMA(KP980785280, T5k, T5h);
Chris@42 873 T5l = FNMS(KP980785280, T5k, T5h);
Chris@42 874 T5e = W[45];
Chris@42 875 Im[WS(rs, 15)] = FMA(T5G, T5E, T5I);
Chris@42 876 Ip[WS(rs, 15)] = FNMS(T5G, T5H, T5F);
Chris@42 877 T51 = W[44];
Chris@42 878 T5m = T5e * T5c;
Chris@42 879 T5q = W[13];
Chris@42 880 T5d = T51 * T5c;
Chris@42 881 Im[WS(rs, 11)] = FMA(T51, T5l, T5m);
Chris@42 882 T5n = W[12];
Chris@42 883 T5s = T5q * T5o;
Chris@42 884 Ip[WS(rs, 11)] = FNMS(T5e, T5l, T5d);
Chris@42 885 }
Chris@42 886 }
Chris@42 887 }
Chris@42 888 }
Chris@42 889 }
Chris@42 890 }
Chris@42 891 }
Chris@42 892 T5p = T5n * T5o;
Chris@42 893 Im[WS(rs, 3)] = FMA(T5n, T5r, T5s);
Chris@42 894 Ip[WS(rs, 3)] = FNMS(T5q, T5r, T5p);
Chris@42 895 }
Chris@42 896 }
Chris@42 897 }
Chris@42 898
Chris@42 899 static const tw_instr twinstr[] = {
Chris@42 900 {TW_FULL, 1, 32},
Chris@42 901 {TW_NEXT, 1, 0}
Chris@42 902 };
Chris@42 903
Chris@42 904 static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@42 905
Chris@42 906 void X(codelet_hc2cb_32) (planner *p) {
Chris@42 907 X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT);
Chris@42 908 }
Chris@42 909 #else /* HAVE_FMA */
Chris@42 910
Chris@42 911 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include hc2cb.h */
Chris@42 912
Chris@42 913 /*
Chris@42 914 * This function contains 434 FP additions, 208 FP multiplications,
Chris@42 915 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@42 916 * 98 stack variables, 7 constants, and 128 memory accesses
Chris@42 917 */
Chris@42 918 #include "hc2cb.h"
Chris@42 919
Chris@42 920 static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 921 {
Chris@42 922 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 923 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 924 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 925 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 926 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 927 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 928 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 929 {
Chris@42 930 INT m;
Chris@42 931 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 932 E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U;
Chris@42 933 E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z;
Chris@42 934 E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54;
Chris@42 935 E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64;
Chris@42 936 E T5n, T65, T38, T3Q;
Chris@42 937 {
Chris@42 938 E T3, T4m, T1X, T5t, T6, T5s, T20, T4n, Ta, T4p, T24, T4q, Td, T4s, T27;
Chris@42 939 E T4t;
Chris@42 940 {
Chris@42 941 E T1, T2, T1V, T1W;
Chris@42 942 T1 = Rp[0];
Chris@42 943 T2 = Rm[WS(rs, 15)];
Chris@42 944 T3 = T1 + T2;
Chris@42 945 T4m = T1 - T2;
Chris@42 946 T1V = Ip[0];
Chris@42 947 T1W = Im[WS(rs, 15)];
Chris@42 948 T1X = T1V - T1W;
Chris@42 949 T5t = T1V + T1W;
Chris@42 950 }
Chris@42 951 {
Chris@42 952 E T4, T5, T1Y, T1Z;
Chris@42 953 T4 = Rp[WS(rs, 8)];
Chris@42 954 T5 = Rm[WS(rs, 7)];
Chris@42 955 T6 = T4 + T5;
Chris@42 956 T5s = T4 - T5;
Chris@42 957 T1Y = Ip[WS(rs, 8)];
Chris@42 958 T1Z = Im[WS(rs, 7)];
Chris@42 959 T20 = T1Y - T1Z;
Chris@42 960 T4n = T1Y + T1Z;
Chris@42 961 }
Chris@42 962 {
Chris@42 963 E T8, T9, T22, T23;
Chris@42 964 T8 = Rp[WS(rs, 4)];
Chris@42 965 T9 = Rm[WS(rs, 11)];
Chris@42 966 Ta = T8 + T9;
Chris@42 967 T4p = T8 - T9;
Chris@42 968 T22 = Ip[WS(rs, 4)];
Chris@42 969 T23 = Im[WS(rs, 11)];
Chris@42 970 T24 = T22 - T23;
Chris@42 971 T4q = T22 + T23;
Chris@42 972 }
Chris@42 973 {
Chris@42 974 E Tb, Tc, T25, T26;
Chris@42 975 Tb = Rm[WS(rs, 3)];
Chris@42 976 Tc = Rp[WS(rs, 12)];
Chris@42 977 Td = Tb + Tc;
Chris@42 978 T4s = Tb - Tc;
Chris@42 979 T25 = Ip[WS(rs, 12)];
Chris@42 980 T26 = Im[WS(rs, 3)];
Chris@42 981 T27 = T25 - T26;
Chris@42 982 T4t = T25 + T26;
Chris@42 983 }
Chris@42 984 {
Chris@42 985 E T7, Te, T21, T28;
Chris@42 986 T4o = T4m - T4n;
Chris@42 987 T6y = T4m + T4n;
Chris@42 988 T70 = T5t - T5s;
Chris@42 989 T5u = T5s + T5t;
Chris@42 990 T7 = T3 + T6;
Chris@42 991 Te = Ta + Td;
Chris@42 992 Tf = T7 + Te;
Chris@42 993 T12 = T7 - Te;
Chris@42 994 {
Chris@42 995 E T5v, T5w, T3k, T3l;
Chris@42 996 T5v = T4p + T4q;
Chris@42 997 T5w = T4s + T4t;
Chris@42 998 T5x = KP707106781 * (T5v - T5w);
Chris@42 999 T6z = KP707106781 * (T5v + T5w);
Chris@42 1000 T3k = T1X - T20;
Chris@42 1001 T3l = Ta - Td;
Chris@42 1002 T3m = T3k - T3l;
Chris@42 1003 T3Y = T3l + T3k;
Chris@42 1004 }
Chris@42 1005 T21 = T1X + T20;
Chris@42 1006 T28 = T24 + T27;
Chris@42 1007 T29 = T21 - T28;
Chris@42 1008 T2y = T21 + T28;
Chris@42 1009 {
Chris@42 1010 E T4r, T4u, T2S, T2T;
Chris@42 1011 T4r = T4p - T4q;
Chris@42 1012 T4u = T4s - T4t;
Chris@42 1013 T4v = KP707106781 * (T4r + T4u);
Chris@42 1014 T71 = KP707106781 * (T4r - T4u);
Chris@42 1015 T2S = T3 - T6;
Chris@42 1016 T2T = T27 - T24;
Chris@42 1017 T2U = T2S - T2T;
Chris@42 1018 T3M = T2S + T2T;
Chris@42 1019 }
Chris@42 1020 }
Chris@42 1021 }
Chris@42 1022 {
Chris@42 1023 E Ti, T4H, T1c, T4F, Tl, T4E, T1f, T4I, Tp, T4A, T15, T4y, Ts, T4x, T18;
Chris@42 1024 E T4B;
Chris@42 1025 {
Chris@42 1026 E Tg, Th, T1a, T1b;
Chris@42 1027 Tg = Rp[WS(rs, 2)];
Chris@42 1028 Th = Rm[WS(rs, 13)];
Chris@42 1029 Ti = Tg + Th;
Chris@42 1030 T4H = Tg - Th;
Chris@42 1031 T1a = Ip[WS(rs, 2)];
Chris@42 1032 T1b = Im[WS(rs, 13)];
Chris@42 1033 T1c = T1a - T1b;
Chris@42 1034 T4F = T1a + T1b;
Chris@42 1035 }
Chris@42 1036 {
Chris@42 1037 E Tj, Tk, T1d, T1e;
Chris@42 1038 Tj = Rp[WS(rs, 10)];
Chris@42 1039 Tk = Rm[WS(rs, 5)];
Chris@42 1040 Tl = Tj + Tk;
Chris@42 1041 T4E = Tj - Tk;
Chris@42 1042 T1d = Ip[WS(rs, 10)];
Chris@42 1043 T1e = Im[WS(rs, 5)];
Chris@42 1044 T1f = T1d - T1e;
Chris@42 1045 T4I = T1d + T1e;
Chris@42 1046 }
Chris@42 1047 {
Chris@42 1048 E Tn, To, T13, T14;
Chris@42 1049 Tn = Rm[WS(rs, 1)];
Chris@42 1050 To = Rp[WS(rs, 14)];
Chris@42 1051 Tp = Tn + To;
Chris@42 1052 T4A = Tn - To;
Chris@42 1053 T13 = Ip[WS(rs, 14)];
Chris@42 1054 T14 = Im[WS(rs, 1)];
Chris@42 1055 T15 = T13 - T14;
Chris@42 1056 T4y = T13 + T14;
Chris@42 1057 }
Chris@42 1058 {
Chris@42 1059 E Tq, Tr, T16, T17;
Chris@42 1060 Tq = Rp[WS(rs, 6)];
Chris@42 1061 Tr = Rm[WS(rs, 9)];
Chris@42 1062 Ts = Tq + Tr;
Chris@42 1063 T4x = Tq - Tr;
Chris@42 1064 T16 = Ip[WS(rs, 6)];
Chris@42 1065 T17 = Im[WS(rs, 9)];
Chris@42 1066 T18 = T16 - T17;
Chris@42 1067 T4B = T16 + T17;
Chris@42 1068 }
Chris@42 1069 {
Chris@42 1070 E Tm, Tt, T6B, T6C;
Chris@42 1071 Tm = Ti + Tl;
Chris@42 1072 Tt = Tp + Ts;
Chris@42 1073 Tu = Tm + Tt;
Chris@42 1074 T1U = Tm - Tt;
Chris@42 1075 T6B = T4H + T4I;
Chris@42 1076 T6C = T4F - T4E;
Chris@42 1077 T6D = FNMS(KP923879532, T6C, KP382683432 * T6B);
Chris@42 1078 T73 = FMA(KP382683432, T6C, KP923879532 * T6B);
Chris@42 1079 }
Chris@42 1080 {
Chris@42 1081 E T6E, T6F, T19, T1g;
Chris@42 1082 T6E = T4A + T4B;
Chris@42 1083 T6F = T4x + T4y;
Chris@42 1084 T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
Chris@42 1085 T74 = FMA(KP382683432, T6F, KP923879532 * T6E);
Chris@42 1086 T19 = T15 + T18;
Chris@42 1087 T1g = T1c + T1f;
Chris@42 1088 T1h = T19 - T1g;
Chris@42 1089 T2z = T1g + T19;
Chris@42 1090 }
Chris@42 1091 {
Chris@42 1092 E T2V, T2W, T4z, T4C;
Chris@42 1093 T2V = T15 - T18;
Chris@42 1094 T2W = Tp - Ts;
Chris@42 1095 T2X = T2V - T2W;
Chris@42 1096 T3o = T2W + T2V;
Chris@42 1097 T4z = T4x - T4y;
Chris@42 1098 T4C = T4A - T4B;
Chris@42 1099 T4D = FNMS(KP382683432, T4C, KP923879532 * T4z);
Chris@42 1100 T5A = FMA(KP382683432, T4z, KP923879532 * T4C);
Chris@42 1101 }
Chris@42 1102 {
Chris@42 1103 E T4G, T4J, T2Y, T2Z;
Chris@42 1104 T4G = T4E + T4F;
Chris@42 1105 T4J = T4H - T4I;
Chris@42 1106 T4K = FMA(KP923879532, T4G, KP382683432 * T4J);
Chris@42 1107 T5z = FNMS(KP382683432, T4G, KP923879532 * T4J);
Chris@42 1108 T2Y = Ti - Tl;
Chris@42 1109 T2Z = T1c - T1f;
Chris@42 1110 T30 = T2Y + T2Z;
Chris@42 1111 T3n = T2Y - T2Z;
Chris@42 1112 }
Chris@42 1113 }
Chris@42 1114 {
Chris@42 1115 E Ty, T4N, T1m, T4Z, TB, T4Y, T1p, T4O, TI, T52, T1w, T4V, TF, T51, T1t;
Chris@42 1116 E T4S;
Chris@42 1117 {
Chris@42 1118 E Tw, Tx, T1n, T1o;
Chris@42 1119 Tw = Rp[WS(rs, 1)];
Chris@42 1120 Tx = Rm[WS(rs, 14)];
Chris@42 1121 Ty = Tw + Tx;
Chris@42 1122 T4N = Tw - Tx;
Chris@42 1123 {
Chris@42 1124 E T1k, T1l, Tz, TA;
Chris@42 1125 T1k = Ip[WS(rs, 1)];
Chris@42 1126 T1l = Im[WS(rs, 14)];
Chris@42 1127 T1m = T1k - T1l;
Chris@42 1128 T4Z = T1k + T1l;
Chris@42 1129 Tz = Rp[WS(rs, 9)];
Chris@42 1130 TA = Rm[WS(rs, 6)];
Chris@42 1131 TB = Tz + TA;
Chris@42 1132 T4Y = Tz - TA;
Chris@42 1133 }
Chris@42 1134 T1n = Ip[WS(rs, 9)];
Chris@42 1135 T1o = Im[WS(rs, 6)];
Chris@42 1136 T1p = T1n - T1o;
Chris@42 1137 T4O = T1n + T1o;
Chris@42 1138 {
Chris@42 1139 E TG, TH, T4T, T1u, T1v, T4U;
Chris@42 1140 TG = Rm[WS(rs, 2)];
Chris@42 1141 TH = Rp[WS(rs, 13)];
Chris@42 1142 T4T = TG - TH;
Chris@42 1143 T1u = Ip[WS(rs, 13)];
Chris@42 1144 T1v = Im[WS(rs, 2)];
Chris@42 1145 T4U = T1u + T1v;
Chris@42 1146 TI = TG + TH;
Chris@42 1147 T52 = T4T + T4U;
Chris@42 1148 T1w = T1u - T1v;
Chris@42 1149 T4V = T4T - T4U;
Chris@42 1150 }
Chris@42 1151 {
Chris@42 1152 E TD, TE, T4Q, T1r, T1s, T4R;
Chris@42 1153 TD = Rp[WS(rs, 5)];
Chris@42 1154 TE = Rm[WS(rs, 10)];
Chris@42 1155 T4Q = TD - TE;
Chris@42 1156 T1r = Ip[WS(rs, 5)];
Chris@42 1157 T1s = Im[WS(rs, 10)];
Chris@42 1158 T4R = T1r + T1s;
Chris@42 1159 TF = TD + TE;
Chris@42 1160 T51 = T4Q + T4R;
Chris@42 1161 T1t = T1r - T1s;
Chris@42 1162 T4S = T4Q - T4R;
Chris@42 1163 }
Chris@42 1164 }
Chris@42 1165 {
Chris@42 1166 E TC, TJ, T6Q, T6R;
Chris@42 1167 TC = Ty + TB;
Chris@42 1168 TJ = TF + TI;
Chris@42 1169 TK = TC + TJ;
Chris@42 1170 T1j = TC - TJ;
Chris@42 1171 T6Q = T4Z - T4Y;
Chris@42 1172 T6R = KP707106781 * (T4S - T4V);
Chris@42 1173 T6S = T6Q + T6R;
Chris@42 1174 T7w = T6Q - T6R;
Chris@42 1175 }
Chris@42 1176 {
Chris@42 1177 E T6T, T6U, T1q, T1x;
Chris@42 1178 T6T = T4N + T4O;
Chris@42 1179 T6U = KP707106781 * (T51 + T52);
Chris@42 1180 T6V = T6T - T6U;
Chris@42 1181 T7v = T6T + T6U;
Chris@42 1182 T1q = T1m + T1p;
Chris@42 1183 T1x = T1t + T1w;
Chris@42 1184 T1y = T1q - T1x;
Chris@42 1185 T2B = T1q + T1x;
Chris@42 1186 }
Chris@42 1187 {
Chris@42 1188 E T3a, T3b, T4P, T4W;
Chris@42 1189 T3a = T1m - T1p;
Chris@42 1190 T3b = TF - TI;
Chris@42 1191 T3c = T3a - T3b;
Chris@42 1192 T3S = T3b + T3a;
Chris@42 1193 T4P = T4N - T4O;
Chris@42 1194 T4W = KP707106781 * (T4S + T4V);
Chris@42 1195 T4X = T4P - T4W;
Chris@42 1196 T61 = T4P + T4W;
Chris@42 1197 }
Chris@42 1198 {
Chris@42 1199 E T50, T53, T3d, T3e;
Chris@42 1200 T50 = T4Y + T4Z;
Chris@42 1201 T53 = KP707106781 * (T51 - T52);
Chris@42 1202 T54 = T50 - T53;
Chris@42 1203 T62 = T50 + T53;
Chris@42 1204 T3d = Ty - TB;
Chris@42 1205 T3e = T1w - T1t;
Chris@42 1206 T3f = T3d - T3e;
Chris@42 1207 T3T = T3d + T3e;
Chris@42 1208 }
Chris@42 1209 }
Chris@42 1210 {
Chris@42 1211 E TN, T56, T1D, T5i, TQ, T5h, T1G, T57, TX, T5l, T1N, T5e, TU, T5k, T1K;
Chris@42 1212 E T5b;
Chris@42 1213 {
Chris@42 1214 E TL, TM, T1E, T1F;
Chris@42 1215 TL = Rm[0];
Chris@42 1216 TM = Rp[WS(rs, 15)];
Chris@42 1217 TN = TL + TM;
Chris@42 1218 T56 = TL - TM;
Chris@42 1219 {
Chris@42 1220 E T1B, T1C, TO, TP;
Chris@42 1221 T1B = Ip[WS(rs, 15)];
Chris@42 1222 T1C = Im[0];
Chris@42 1223 T1D = T1B - T1C;
Chris@42 1224 T5i = T1B + T1C;
Chris@42 1225 TO = Rp[WS(rs, 7)];
Chris@42 1226 TP = Rm[WS(rs, 8)];
Chris@42 1227 TQ = TO + TP;
Chris@42 1228 T5h = TO - TP;
Chris@42 1229 }
Chris@42 1230 T1E = Ip[WS(rs, 7)];
Chris@42 1231 T1F = Im[WS(rs, 8)];
Chris@42 1232 T1G = T1E - T1F;
Chris@42 1233 T57 = T1E + T1F;
Chris@42 1234 {
Chris@42 1235 E TV, TW, T5c, T1L, T1M, T5d;
Chris@42 1236 TV = Rm[WS(rs, 4)];
Chris@42 1237 TW = Rp[WS(rs, 11)];
Chris@42 1238 T5c = TV - TW;
Chris@42 1239 T1L = Ip[WS(rs, 11)];
Chris@42 1240 T1M = Im[WS(rs, 4)];
Chris@42 1241 T5d = T1L + T1M;
Chris@42 1242 TX = TV + TW;
Chris@42 1243 T5l = T5c + T5d;
Chris@42 1244 T1N = T1L - T1M;
Chris@42 1245 T5e = T5c - T5d;
Chris@42 1246 }
Chris@42 1247 {
Chris@42 1248 E TS, TT, T59, T1I, T1J, T5a;
Chris@42 1249 TS = Rp[WS(rs, 3)];
Chris@42 1250 TT = Rm[WS(rs, 12)];
Chris@42 1251 T59 = TS - TT;
Chris@42 1252 T1I = Ip[WS(rs, 3)];
Chris@42 1253 T1J = Im[WS(rs, 12)];
Chris@42 1254 T5a = T1I + T1J;
Chris@42 1255 TU = TS + TT;
Chris@42 1256 T5k = T59 + T5a;
Chris@42 1257 T1K = T1I - T1J;
Chris@42 1258 T5b = T59 - T5a;
Chris@42 1259 }
Chris@42 1260 }
Chris@42 1261 {
Chris@42 1262 E TR, TY, T6J, T6K;
Chris@42 1263 TR = TN + TQ;
Chris@42 1264 TY = TU + TX;
Chris@42 1265 TZ = TR + TY;
Chris@42 1266 T1A = TR - TY;
Chris@42 1267 T6J = KP707106781 * (T5b - T5e);
Chris@42 1268 T6K = T5h + T5i;
Chris@42 1269 T6L = T6J - T6K;
Chris@42 1270 T7z = T6K + T6J;
Chris@42 1271 }
Chris@42 1272 {
Chris@42 1273 E T6M, T6N, T1H, T1O;
Chris@42 1274 T6M = T56 + T57;
Chris@42 1275 T6N = KP707106781 * (T5k + T5l);
Chris@42 1276 T6O = T6M - T6N;
Chris@42 1277 T7y = T6M + T6N;
Chris@42 1278 T1H = T1D + T1G;
Chris@42 1279 T1O = T1K + T1N;
Chris@42 1280 T1P = T1H - T1O;
Chris@42 1281 T2C = T1H + T1O;
Chris@42 1282 }
Chris@42 1283 {
Chris@42 1284 E T33, T34, T58, T5f;
Chris@42 1285 T33 = T1D - T1G;
Chris@42 1286 T34 = TU - TX;
Chris@42 1287 T35 = T33 - T34;
Chris@42 1288 T3P = T34 + T33;
Chris@42 1289 T58 = T56 - T57;
Chris@42 1290 T5f = KP707106781 * (T5b + T5e);
Chris@42 1291 T5g = T58 - T5f;
Chris@42 1292 T64 = T58 + T5f;
Chris@42 1293 }
Chris@42 1294 {
Chris@42 1295 E T5j, T5m, T36, T37;
Chris@42 1296 T5j = T5h - T5i;
Chris@42 1297 T5m = KP707106781 * (T5k - T5l);
Chris@42 1298 T5n = T5j - T5m;
Chris@42 1299 T65 = T5j + T5m;
Chris@42 1300 T36 = TN - TQ;
Chris@42 1301 T37 = T1N - T1K;
Chris@42 1302 T38 = T36 - T37;
Chris@42 1303 T3Q = T36 + T37;
Chris@42 1304 }
Chris@42 1305 }
Chris@42 1306 {
Chris@42 1307 E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x;
Chris@42 1308 Tv = Tf + Tu;
Chris@42 1309 T10 = TK + TZ;
Chris@42 1310 T2w = Tv - T10;
Chris@42 1311 T2A = T2y + T2z;
Chris@42 1312 T2D = T2B + T2C;
Chris@42 1313 T2E = T2A - T2D;
Chris@42 1314 Rp[0] = Tv + T10;
Chris@42 1315 Rm[0] = T2A + T2D;
Chris@42 1316 T2v = W[30];
Chris@42 1317 T2x = W[31];
Chris@42 1318 Rp[WS(rs, 8)] = FNMS(T2x, T2E, T2v * T2w);
Chris@42 1319 Rm[WS(rs, 8)] = FMA(T2x, T2w, T2v * T2E);
Chris@42 1320 }
Chris@42 1321 {
Chris@42 1322 E T2I, T2O, T2M, T2Q;
Chris@42 1323 {
Chris@42 1324 E T2G, T2H, T2K, T2L;
Chris@42 1325 T2G = Tf - Tu;
Chris@42 1326 T2H = T2C - T2B;
Chris@42 1327 T2I = T2G - T2H;
Chris@42 1328 T2O = T2G + T2H;
Chris@42 1329 T2K = T2y - T2z;
Chris@42 1330 T2L = TK - TZ;
Chris@42 1331 T2M = T2K - T2L;
Chris@42 1332 T2Q = T2L + T2K;
Chris@42 1333 }
Chris@42 1334 {
Chris@42 1335 E T2F, T2J, T2N, T2P;
Chris@42 1336 T2F = W[46];
Chris@42 1337 T2J = W[47];
Chris@42 1338 Rp[WS(rs, 12)] = FNMS(T2J, T2M, T2F * T2I);
Chris@42 1339 Rm[WS(rs, 12)] = FMA(T2F, T2M, T2J * T2I);
Chris@42 1340 T2N = W[14];
Chris@42 1341 T2P = W[15];
Chris@42 1342 Rp[WS(rs, 4)] = FNMS(T2P, T2Q, T2N * T2O);
Chris@42 1343 Rm[WS(rs, 4)] = FMA(T2N, T2Q, T2P * T2O);
Chris@42 1344 }
Chris@42 1345 }
Chris@42 1346 {
Chris@42 1347 E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p;
Chris@42 1348 T1i = T12 + T1h;
Chris@42 1349 T2a = T1U + T29;
Chris@42 1350 T2o = T29 - T1U;
Chris@42 1351 T2k = T12 - T1h;
Chris@42 1352 {
Chris@42 1353 E T2b, T2c, T1z, T1Q;
Chris@42 1354 T2b = T1j + T1y;
Chris@42 1355 T2c = T1P - T1A;
Chris@42 1356 T2d = KP707106781 * (T2b + T2c);
Chris@42 1357 T2l = KP707106781 * (T2c - T2b);
Chris@42 1358 T1z = T1j - T1y;
Chris@42 1359 T1Q = T1A + T1P;
Chris@42 1360 T1R = KP707106781 * (T1z + T1Q);
Chris@42 1361 T2p = KP707106781 * (T1z - T1Q);
Chris@42 1362 }
Chris@42 1363 {
Chris@42 1364 E T1S, T2e, T11, T1T;
Chris@42 1365 T1S = T1i - T1R;
Chris@42 1366 T2e = T2a - T2d;
Chris@42 1367 T11 = W[38];
Chris@42 1368 T1T = W[39];
Chris@42 1369 Rp[WS(rs, 10)] = FNMS(T1T, T2e, T11 * T1S);
Chris@42 1370 Rm[WS(rs, 10)] = FMA(T1T, T1S, T11 * T2e);
Chris@42 1371 }
Chris@42 1372 {
Chris@42 1373 E T2s, T2u, T2r, T2t;
Chris@42 1374 T2s = T2k + T2l;
Chris@42 1375 T2u = T2o + T2p;
Chris@42 1376 T2r = W[22];
Chris@42 1377 T2t = W[23];
Chris@42 1378 Rp[WS(rs, 6)] = FNMS(T2t, T2u, T2r * T2s);
Chris@42 1379 Rm[WS(rs, 6)] = FMA(T2r, T2u, T2t * T2s);
Chris@42 1380 }
Chris@42 1381 {
Chris@42 1382 E T2g, T2i, T2f, T2h;
Chris@42 1383 T2g = T1i + T1R;
Chris@42 1384 T2i = T2a + T2d;
Chris@42 1385 T2f = W[6];
Chris@42 1386 T2h = W[7];
Chris@42 1387 Rp[WS(rs, 2)] = FNMS(T2h, T2i, T2f * T2g);
Chris@42 1388 Rm[WS(rs, 2)] = FMA(T2h, T2g, T2f * T2i);
Chris@42 1389 }
Chris@42 1390 {
Chris@42 1391 E T2m, T2q, T2j, T2n;
Chris@42 1392 T2m = T2k - T2l;
Chris@42 1393 T2q = T2o - T2p;
Chris@42 1394 T2j = W[54];
Chris@42 1395 T2n = W[55];
Chris@42 1396 Rp[WS(rs, 14)] = FNMS(T2n, T2q, T2j * T2m);
Chris@42 1397 Rm[WS(rs, 14)] = FMA(T2j, T2q, T2n * T2m);
Chris@42 1398 }
Chris@42 1399 }
Chris@42 1400 {
Chris@42 1401 E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z;
Chris@42 1402 T3N = KP707106781 * (T3n + T3o);
Chris@42 1403 T3O = T3M - T3N;
Chris@42 1404 T4a = T3M + T3N;
Chris@42 1405 T3Z = KP707106781 * (T30 + T2X);
Chris@42 1406 T40 = T3Y - T3Z;
Chris@42 1407 T4e = T3Y + T3Z;
Chris@42 1408 {
Chris@42 1409 E T3R, T3U, T41, T42;
Chris@42 1410 T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P);
Chris@42 1411 T3U = FMA(KP923879532, T3S, KP382683432 * T3T);
Chris@42 1412 T3V = T3R - T3U;
Chris@42 1413 T4f = T3U + T3R;
Chris@42 1414 T41 = FNMS(KP382683432, T3S, KP923879532 * T3T);
Chris@42 1415 T42 = FMA(KP382683432, T3P, KP923879532 * T3Q);
Chris@42 1416 T43 = T41 - T42;
Chris@42 1417 T4b = T41 + T42;
Chris@42 1418 }
Chris@42 1419 {
Chris@42 1420 E T3W, T44, T3L, T3X;
Chris@42 1421 T3W = T3O - T3V;
Chris@42 1422 T44 = T40 - T43;
Chris@42 1423 T3L = W[50];
Chris@42 1424 T3X = W[51];
Chris@42 1425 Rp[WS(rs, 13)] = FNMS(T3X, T44, T3L * T3W);
Chris@42 1426 Rm[WS(rs, 13)] = FMA(T3X, T3W, T3L * T44);
Chris@42 1427 }
Chris@42 1428 {
Chris@42 1429 E T4i, T4k, T4h, T4j;
Chris@42 1430 T4i = T4a + T4b;
Chris@42 1431 T4k = T4e + T4f;
Chris@42 1432 T4h = W[2];
Chris@42 1433 T4j = W[3];
Chris@42 1434 Rp[WS(rs, 1)] = FNMS(T4j, T4k, T4h * T4i);
Chris@42 1435 Rm[WS(rs, 1)] = FMA(T4h, T4k, T4j * T4i);
Chris@42 1436 }
Chris@42 1437 {
Chris@42 1438 E T46, T48, T45, T47;
Chris@42 1439 T46 = T3O + T3V;
Chris@42 1440 T48 = T40 + T43;
Chris@42 1441 T45 = W[18];
Chris@42 1442 T47 = W[19];
Chris@42 1443 Rp[WS(rs, 5)] = FNMS(T47, T48, T45 * T46);
Chris@42 1444 Rm[WS(rs, 5)] = FMA(T47, T46, T45 * T48);
Chris@42 1445 }
Chris@42 1446 {
Chris@42 1447 E T4c, T4g, T49, T4d;
Chris@42 1448 T4c = T4a - T4b;
Chris@42 1449 T4g = T4e - T4f;
Chris@42 1450 T49 = W[34];
Chris@42 1451 T4d = W[35];
Chris@42 1452 Rp[WS(rs, 9)] = FNMS(T4d, T4g, T49 * T4c);
Chris@42 1453 Rm[WS(rs, 9)] = FMA(T49, T4g, T4d * T4c);
Chris@42 1454 }
Chris@42 1455 }
Chris@42 1456 {
Chris@42 1457 E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p;
Chris@42 1458 T31 = KP707106781 * (T2X - T30);
Chris@42 1459 T32 = T2U - T31;
Chris@42 1460 T3A = T2U + T31;
Chris@42 1461 T3p = KP707106781 * (T3n - T3o);
Chris@42 1462 T3q = T3m - T3p;
Chris@42 1463 T3E = T3m + T3p;
Chris@42 1464 {
Chris@42 1465 E T39, T3g, T3r, T3s;
Chris@42 1466 T39 = FNMS(KP923879532, T38, KP382683432 * T35);
Chris@42 1467 T3g = FMA(KP382683432, T3c, KP923879532 * T3f);
Chris@42 1468 T3h = T39 - T3g;
Chris@42 1469 T3F = T3g + T39;
Chris@42 1470 T3r = FNMS(KP923879532, T3c, KP382683432 * T3f);
Chris@42 1471 T3s = FMA(KP923879532, T35, KP382683432 * T38);
Chris@42 1472 T3t = T3r - T3s;
Chris@42 1473 T3B = T3r + T3s;
Chris@42 1474 }
Chris@42 1475 {
Chris@42 1476 E T3i, T3u, T2R, T3j;
Chris@42 1477 T3i = T32 - T3h;
Chris@42 1478 T3u = T3q - T3t;
Chris@42 1479 T2R = W[58];
Chris@42 1480 T3j = W[59];
Chris@42 1481 Rp[WS(rs, 15)] = FNMS(T3j, T3u, T2R * T3i);
Chris@42 1482 Rm[WS(rs, 15)] = FMA(T3j, T3i, T2R * T3u);
Chris@42 1483 }
Chris@42 1484 {
Chris@42 1485 E T3I, T3K, T3H, T3J;
Chris@42 1486 T3I = T3A + T3B;
Chris@42 1487 T3K = T3E + T3F;
Chris@42 1488 T3H = W[10];
Chris@42 1489 T3J = W[11];
Chris@42 1490 Rp[WS(rs, 3)] = FNMS(T3J, T3K, T3H * T3I);
Chris@42 1491 Rm[WS(rs, 3)] = FMA(T3H, T3K, T3J * T3I);
Chris@42 1492 }
Chris@42 1493 {
Chris@42 1494 E T3w, T3y, T3v, T3x;
Chris@42 1495 T3w = T32 + T3h;
Chris@42 1496 T3y = T3q + T3t;
Chris@42 1497 T3v = W[26];
Chris@42 1498 T3x = W[27];
Chris@42 1499 Rp[WS(rs, 7)] = FNMS(T3x, T3y, T3v * T3w);
Chris@42 1500 Rm[WS(rs, 7)] = FMA(T3x, T3w, T3v * T3y);
Chris@42 1501 }
Chris@42 1502 {
Chris@42 1503 E T3C, T3G, T3z, T3D;
Chris@42 1504 T3C = T3A - T3B;
Chris@42 1505 T3G = T3E - T3F;
Chris@42 1506 T3z = W[42];
Chris@42 1507 T3D = W[43];
Chris@42 1508 Rp[WS(rs, 11)] = FNMS(T3D, T3G, T3z * T3C);
Chris@42 1509 Rm[WS(rs, 11)] = FMA(T3z, T3G, T3D * T3C);
Chris@42 1510 }
Chris@42 1511 }
Chris@42 1512 {
Chris@42 1513 E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q;
Chris@42 1514 {
Chris@42 1515 E T5Y, T5Z, T6d, T6e;
Chris@42 1516 T5Y = T4o + T4v;
Chris@42 1517 T5Z = T5z + T5A;
Chris@42 1518 T60 = T5Y + T5Z;
Chris@42 1519 T6m = T5Y - T5Z;
Chris@42 1520 T6d = FMA(KP195090322, T61, KP980785280 * T62);
Chris@42 1521 T6e = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@42 1522 T6f = T6d + T6e;
Chris@42 1523 T6n = T6e - T6d;
Chris@42 1524 }
Chris@42 1525 {
Chris@42 1526 E T63, T66, T6a, T6b;
Chris@42 1527 T63 = FNMS(KP195090322, T62, KP980785280 * T61);
Chris@42 1528 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@42 1529 T67 = T63 + T66;
Chris@42 1530 T6r = T63 - T66;
Chris@42 1531 T6a = T5u + T5x;
Chris@42 1532 T6b = T4K + T4D;
Chris@42 1533 T6c = T6a + T6b;
Chris@42 1534 T6q = T6a - T6b;
Chris@42 1535 }
Chris@42 1536 {
Chris@42 1537 E T68, T6g, T5X, T69;
Chris@42 1538 T68 = T60 - T67;
Chris@42 1539 T6g = T6c - T6f;
Chris@42 1540 T5X = W[32];
Chris@42 1541 T69 = W[33];
Chris@42 1542 Ip[WS(rs, 8)] = FNMS(T69, T6g, T5X * T68);
Chris@42 1543 Im[WS(rs, 8)] = FMA(T69, T68, T5X * T6g);
Chris@42 1544 }
Chris@42 1545 {
Chris@42 1546 E T6u, T6w, T6t, T6v;
Chris@42 1547 T6u = T6m + T6n;
Chris@42 1548 T6w = T6q + T6r;
Chris@42 1549 T6t = W[16];
Chris@42 1550 T6v = W[17];
Chris@42 1551 Ip[WS(rs, 4)] = FNMS(T6v, T6w, T6t * T6u);
Chris@42 1552 Im[WS(rs, 4)] = FMA(T6t, T6w, T6v * T6u);
Chris@42 1553 }
Chris@42 1554 {
Chris@42 1555 E T6i, T6k, T6h, T6j;
Chris@42 1556 T6i = T60 + T67;
Chris@42 1557 T6k = T6c + T6f;
Chris@42 1558 T6h = W[0];
Chris@42 1559 T6j = W[1];
Chris@42 1560 Ip[0] = FNMS(T6j, T6k, T6h * T6i);
Chris@42 1561 Im[0] = FMA(T6j, T6i, T6h * T6k);
Chris@42 1562 }
Chris@42 1563 {
Chris@42 1564 E T6o, T6s, T6l, T6p;
Chris@42 1565 T6o = T6m - T6n;
Chris@42 1566 T6s = T6q - T6r;
Chris@42 1567 T6l = W[48];
Chris@42 1568 T6p = W[49];
Chris@42 1569 Ip[WS(rs, 12)] = FNMS(T6p, T6s, T6l * T6o);
Chris@42 1570 Im[WS(rs, 12)] = FMA(T6l, T6s, T6p * T6o);
Chris@42 1571 }
Chris@42 1572 }
Chris@42 1573 {
Chris@42 1574 E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U;
Chris@42 1575 {
Chris@42 1576 E T7s, T7t, T7H, T7I;
Chris@42 1577 T7s = T6y + T6z;
Chris@42 1578 T7t = T73 + T74;
Chris@42 1579 T7u = T7s - T7t;
Chris@42 1580 T7Q = T7s + T7t;
Chris@42 1581 T7H = FMA(KP195090322, T7w, KP980785280 * T7v);
Chris@42 1582 T7I = FMA(KP195090322, T7z, KP980785280 * T7y);
Chris@42 1583 T7J = T7H - T7I;
Chris@42 1584 T7R = T7H + T7I;
Chris@42 1585 }
Chris@42 1586 {
Chris@42 1587 E T7x, T7A, T7E, T7F;
Chris@42 1588 T7x = FNMS(KP980785280, T7w, KP195090322 * T7v);
Chris@42 1589 T7A = FNMS(KP980785280, T7z, KP195090322 * T7y);
Chris@42 1590 T7B = T7x + T7A;
Chris@42 1591 T7V = T7x - T7A;
Chris@42 1592 T7E = T70 - T71;
Chris@42 1593 T7F = T6D - T6G;
Chris@42 1594 T7G = T7E + T7F;
Chris@42 1595 T7U = T7E - T7F;
Chris@42 1596 }
Chris@42 1597 {
Chris@42 1598 E T7C, T7K, T7r, T7D;
Chris@42 1599 T7C = T7u - T7B;
Chris@42 1600 T7K = T7G - T7J;
Chris@42 1601 T7r = W[44];
Chris@42 1602 T7D = W[45];
Chris@42 1603 Ip[WS(rs, 11)] = FNMS(T7D, T7K, T7r * T7C);
Chris@42 1604 Im[WS(rs, 11)] = FMA(T7D, T7C, T7r * T7K);
Chris@42 1605 }
Chris@42 1606 {
Chris@42 1607 E T7Y, T80, T7X, T7Z;
Chris@42 1608 T7Y = T7Q + T7R;
Chris@42 1609 T80 = T7U - T7V;
Chris@42 1610 T7X = W[60];
Chris@42 1611 T7Z = W[61];
Chris@42 1612 Ip[WS(rs, 15)] = FNMS(T7Z, T80, T7X * T7Y);
Chris@42 1613 Im[WS(rs, 15)] = FMA(T7X, T80, T7Z * T7Y);
Chris@42 1614 }
Chris@42 1615 {
Chris@42 1616 E T7M, T7O, T7L, T7N;
Chris@42 1617 T7M = T7u + T7B;
Chris@42 1618 T7O = T7G + T7J;
Chris@42 1619 T7L = W[12];
Chris@42 1620 T7N = W[13];
Chris@42 1621 Ip[WS(rs, 3)] = FNMS(T7N, T7O, T7L * T7M);
Chris@42 1622 Im[WS(rs, 3)] = FMA(T7N, T7M, T7L * T7O);
Chris@42 1623 }
Chris@42 1624 {
Chris@42 1625 E T7S, T7W, T7P, T7T;
Chris@42 1626 T7S = T7Q - T7R;
Chris@42 1627 T7W = T7U + T7V;
Chris@42 1628 T7P = W[28];
Chris@42 1629 T7T = W[29];
Chris@42 1630 Ip[WS(rs, 7)] = FNMS(T7T, T7W, T7P * T7S);
Chris@42 1631 Im[WS(rs, 7)] = FMA(T7P, T7W, T7T * T7S);
Chris@42 1632 }
Chris@42 1633 }
Chris@42 1634 {
Chris@42 1635 E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q;
Chris@42 1636 {
Chris@42 1637 E T4w, T4L, T5D, T5E;
Chris@42 1638 T4w = T4o - T4v;
Chris@42 1639 T4L = T4D - T4K;
Chris@42 1640 T4M = T4w + T4L;
Chris@42 1641 T5M = T4w - T4L;
Chris@42 1642 T5D = FMA(KP831469612, T4X, KP555570233 * T54);
Chris@42 1643 T5E = FNMS(KP831469612, T5g, KP555570233 * T5n);
Chris@42 1644 T5F = T5D + T5E;
Chris@42 1645 T5N = T5E - T5D;
Chris@42 1646 }
Chris@42 1647 {
Chris@42 1648 E T55, T5o, T5y, T5B;
Chris@42 1649 T55 = FNMS(KP831469612, T54, KP555570233 * T4X);
Chris@42 1650 T5o = FMA(KP555570233, T5g, KP831469612 * T5n);
Chris@42 1651 T5p = T55 + T5o;
Chris@42 1652 T5R = T55 - T5o;
Chris@42 1653 T5y = T5u - T5x;
Chris@42 1654 T5B = T5z - T5A;
Chris@42 1655 T5C = T5y + T5B;
Chris@42 1656 T5Q = T5y - T5B;
Chris@42 1657 }
Chris@42 1658 {
Chris@42 1659 E T5q, T5G, T4l, T5r;
Chris@42 1660 T5q = T4M - T5p;
Chris@42 1661 T5G = T5C - T5F;
Chris@42 1662 T4l = W[40];
Chris@42 1663 T5r = W[41];
Chris@42 1664 Ip[WS(rs, 10)] = FNMS(T5r, T5G, T4l * T5q);
Chris@42 1665 Im[WS(rs, 10)] = FMA(T5r, T5q, T4l * T5G);
Chris@42 1666 }
Chris@42 1667 {
Chris@42 1668 E T5U, T5W, T5T, T5V;
Chris@42 1669 T5U = T5M + T5N;
Chris@42 1670 T5W = T5Q + T5R;
Chris@42 1671 T5T = W[24];
Chris@42 1672 T5V = W[25];
Chris@42 1673 Ip[WS(rs, 6)] = FNMS(T5V, T5W, T5T * T5U);
Chris@42 1674 Im[WS(rs, 6)] = FMA(T5T, T5W, T5V * T5U);
Chris@42 1675 }
Chris@42 1676 {
Chris@42 1677 E T5I, T5K, T5H, T5J;
Chris@42 1678 T5I = T4M + T5p;
Chris@42 1679 T5K = T5C + T5F;
Chris@42 1680 T5H = W[8];
Chris@42 1681 T5J = W[9];
Chris@42 1682 Ip[WS(rs, 2)] = FNMS(T5J, T5K, T5H * T5I);
Chris@42 1683 Im[WS(rs, 2)] = FMA(T5J, T5I, T5H * T5K);
Chris@42 1684 }
Chris@42 1685 {
Chris@42 1686 E T5O, T5S, T5L, T5P;
Chris@42 1687 T5O = T5M - T5N;
Chris@42 1688 T5S = T5Q - T5R;
Chris@42 1689 T5L = W[56];
Chris@42 1690 T5P = W[57];
Chris@42 1691 Ip[WS(rs, 14)] = FNMS(T5P, T5S, T5L * T5O);
Chris@42 1692 Im[WS(rs, 14)] = FMA(T5L, T5S, T5P * T5O);
Chris@42 1693 }
Chris@42 1694 }
Chris@42 1695 {
Chris@42 1696 E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k;
Chris@42 1697 {
Chris@42 1698 E T6A, T6H, T77, T78;
Chris@42 1699 T6A = T6y - T6z;
Chris@42 1700 T6H = T6D + T6G;
Chris@42 1701 T6I = T6A - T6H;
Chris@42 1702 T7g = T6A + T6H;
Chris@42 1703 T77 = FNMS(KP555570233, T6S, KP831469612 * T6V);
Chris@42 1704 T78 = FMA(KP555570233, T6L, KP831469612 * T6O);
Chris@42 1705 T79 = T77 - T78;
Chris@42 1706 T7h = T77 + T78;
Chris@42 1707 }
Chris@42 1708 {
Chris@42 1709 E T6P, T6W, T72, T75;
Chris@42 1710 T6P = FNMS(KP555570233, T6O, KP831469612 * T6L);
Chris@42 1711 T6W = FMA(KP831469612, T6S, KP555570233 * T6V);
Chris@42 1712 T6X = T6P - T6W;
Chris@42 1713 T7l = T6W + T6P;
Chris@42 1714 T72 = T70 + T71;
Chris@42 1715 T75 = T73 - T74;
Chris@42 1716 T76 = T72 - T75;
Chris@42 1717 T7k = T72 + T75;
Chris@42 1718 }
Chris@42 1719 {
Chris@42 1720 E T6Y, T7a, T6x, T6Z;
Chris@42 1721 T6Y = T6I - T6X;
Chris@42 1722 T7a = T76 - T79;
Chris@42 1723 T6x = W[52];
Chris@42 1724 T6Z = W[53];
Chris@42 1725 Ip[WS(rs, 13)] = FNMS(T6Z, T7a, T6x * T6Y);
Chris@42 1726 Im[WS(rs, 13)] = FMA(T6Z, T6Y, T6x * T7a);
Chris@42 1727 }
Chris@42 1728 {
Chris@42 1729 E T7o, T7q, T7n, T7p;
Chris@42 1730 T7o = T7g + T7h;
Chris@42 1731 T7q = T7k + T7l;
Chris@42 1732 T7n = W[4];
Chris@42 1733 T7p = W[5];
Chris@42 1734 Ip[WS(rs, 1)] = FNMS(T7p, T7q, T7n * T7o);
Chris@42 1735 Im[WS(rs, 1)] = FMA(T7n, T7q, T7p * T7o);
Chris@42 1736 }
Chris@42 1737 {
Chris@42 1738 E T7c, T7e, T7b, T7d;
Chris@42 1739 T7c = T6I + T6X;
Chris@42 1740 T7e = T76 + T79;
Chris@42 1741 T7b = W[20];
Chris@42 1742 T7d = W[21];
Chris@42 1743 Ip[WS(rs, 5)] = FNMS(T7d, T7e, T7b * T7c);
Chris@42 1744 Im[WS(rs, 5)] = FMA(T7d, T7c, T7b * T7e);
Chris@42 1745 }
Chris@42 1746 {
Chris@42 1747 E T7i, T7m, T7f, T7j;
Chris@42 1748 T7i = T7g - T7h;
Chris@42 1749 T7m = T7k - T7l;
Chris@42 1750 T7f = W[36];
Chris@42 1751 T7j = W[37];
Chris@42 1752 Ip[WS(rs, 9)] = FNMS(T7j, T7m, T7f * T7i);
Chris@42 1753 Im[WS(rs, 9)] = FMA(T7f, T7m, T7j * T7i);
Chris@42 1754 }
Chris@42 1755 }
Chris@42 1756 }
Chris@42 1757 }
Chris@42 1758 }
Chris@42 1759
Chris@42 1760 static const tw_instr twinstr[] = {
Chris@42 1761 {TW_FULL, 1, 32},
Chris@42 1762 {TW_NEXT, 1, 0}
Chris@42 1763 };
Chris@42 1764
Chris@42 1765 static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@42 1766
Chris@42 1767 void X(codelet_hc2cb_32) (planner *p) {
Chris@42 1768 X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT);
Chris@42 1769 }
Chris@42 1770 #endif /* HAVE_FMA */