annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:53 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
Chris@82 33 * 102 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 49 E Tf, T5K, T7k, T8k, T7N, T8x, T1i, T3i, T2L, T3v, T4v, T5f, T6m, T6T, T42;
Chris@82 50 E T52, TZ, T6X, T1X, T3p, T8p, T8B, T26, T3o, T4n, T58, T7z, T7T, T4k, T59;
Chris@82 51 E T6a, T6p, TK, T6W, T2o, T3m, T8s, T8A, T2x, T3l, T4g, T55, T7G, T7S, T4d;
Chris@82 52 E T56, T61, T6o, Tu, T6f, T7r, T8y, T7Q, T8l, T1F, T3w, T2O, T3j, T4y, T53;
Chris@82 53 E T5R, T6U, T49, T5g;
Chris@82 54 {
Chris@82 55 E T3, T12, T2G, T6g, T6, T2D, T15, T6h, Td, T6k, T1g, T2J, Ta, T6j, T1b;
Chris@82 56 E T2I;
Chris@82 57 {
Chris@82 58 E T1, T2, T13, T14;
Chris@82 59 T1 = Rp[0];
Chris@82 60 T2 = Rm[WS(rs, 15)];
Chris@82 61 T3 = T1 + T2;
Chris@82 62 T12 = T1 - T2;
Chris@82 63 {
Chris@82 64 E T2E, T2F, T4, T5;
Chris@82 65 T2E = Ip[0];
Chris@82 66 T2F = Im[WS(rs, 15)];
Chris@82 67 T2G = T2E + T2F;
Chris@82 68 T6g = T2E - T2F;
Chris@82 69 T4 = Rp[WS(rs, 8)];
Chris@82 70 T5 = Rm[WS(rs, 7)];
Chris@82 71 T6 = T4 + T5;
Chris@82 72 T2D = T4 - T5;
Chris@82 73 }
Chris@82 74 T13 = Ip[WS(rs, 8)];
Chris@82 75 T14 = Im[WS(rs, 7)];
Chris@82 76 T15 = T13 + T14;
Chris@82 77 T6h = T13 - T14;
Chris@82 78 {
Chris@82 79 E Tb, Tc, T1c, T1d, T1e, T1f;
Chris@82 80 Tb = Rm[WS(rs, 3)];
Chris@82 81 Tc = Rp[WS(rs, 12)];
Chris@82 82 T1c = Tb - Tc;
Chris@82 83 T1d = Ip[WS(rs, 12)];
Chris@82 84 T1e = Im[WS(rs, 3)];
Chris@82 85 T1f = T1d + T1e;
Chris@82 86 Td = Tb + Tc;
Chris@82 87 T6k = T1d - T1e;
Chris@82 88 T1g = T1c - T1f;
Chris@82 89 T2J = T1c + T1f;
Chris@82 90 }
Chris@82 91 {
Chris@82 92 E T8, T9, T17, T18, T19, T1a;
Chris@82 93 T8 = Rp[WS(rs, 4)];
Chris@82 94 T9 = Rm[WS(rs, 11)];
Chris@82 95 T17 = T8 - T9;
Chris@82 96 T18 = Ip[WS(rs, 4)];
Chris@82 97 T19 = Im[WS(rs, 11)];
Chris@82 98 T1a = T18 + T19;
Chris@82 99 Ta = T8 + T9;
Chris@82 100 T6j = T18 - T19;
Chris@82 101 T1b = T17 - T1a;
Chris@82 102 T2I = T17 + T1a;
Chris@82 103 }
Chris@82 104 }
Chris@82 105 {
Chris@82 106 E T7, Te, T7i, T7j;
Chris@82 107 T7 = T3 + T6;
Chris@82 108 Te = Ta + Td;
Chris@82 109 Tf = T7 + Te;
Chris@82 110 T5K = T7 - Te;
Chris@82 111 T7i = T3 - T6;
Chris@82 112 T7j = T6k - T6j;
Chris@82 113 T7k = T7i - T7j;
Chris@82 114 T8k = T7i + T7j;
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E T7L, T7M, T16, T1h;
Chris@82 118 T7L = T6g - T6h;
Chris@82 119 T7M = Ta - Td;
Chris@82 120 T7N = T7L - T7M;
Chris@82 121 T8x = T7M + T7L;
Chris@82 122 T16 = T12 - T15;
Chris@82 123 T1h = T1b + T1g;
Chris@82 124 T1i = FNMS(KP707106781, T1h, T16);
Chris@82 125 T3i = FMA(KP707106781, T1h, T16);
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E T2H, T2K, T4t, T4u;
Chris@82 129 T2H = T2D + T2G;
Chris@82 130 T2K = T2I - T2J;
Chris@82 131 T2L = FNMS(KP707106781, T2K, T2H);
Chris@82 132 T3v = FMA(KP707106781, T2K, T2H);
Chris@82 133 T4t = T2G - T2D;
Chris@82 134 T4u = T1b - T1g;
Chris@82 135 T4v = FMA(KP707106781, T4u, T4t);
Chris@82 136 T5f = FNMS(KP707106781, T4u, T4t);
Chris@82 137 }
Chris@82 138 {
Chris@82 139 E T6i, T6l, T40, T41;
Chris@82 140 T6i = T6g + T6h;
Chris@82 141 T6l = T6j + T6k;
Chris@82 142 T6m = T6i - T6l;
Chris@82 143 T6T = T6i + T6l;
Chris@82 144 T40 = T12 + T15;
Chris@82 145 T41 = T2I + T2J;
Chris@82 146 T42 = FNMS(KP707106781, T41, T40);
Chris@82 147 T52 = FMA(KP707106781, T41, T40);
Chris@82 148 }
Chris@82 149 }
Chris@82 150 {
Chris@82 151 E TR, T7w, T1H, T1Y, T1K, T7t, T21, T65, TY, T7u, T7x, T1Q, T1V, T24, T68;
Chris@82 152 E T23, T7v, T7y;
Chris@82 153 {
Chris@82 154 E TL, TM, TN, TO, TP, TQ;
Chris@82 155 TL = Rm[0];
Chris@82 156 TM = Rp[WS(rs, 15)];
Chris@82 157 TN = TL + TM;
Chris@82 158 TO = Rp[WS(rs, 7)];
Chris@82 159 TP = Rm[WS(rs, 8)];
Chris@82 160 TQ = TO + TP;
Chris@82 161 TR = TN + TQ;
Chris@82 162 T7w = TN - TQ;
Chris@82 163 T1H = TO - TP;
Chris@82 164 T1Y = TL - TM;
Chris@82 165 }
Chris@82 166 {
Chris@82 167 E T1I, T1J, T63, T1Z, T20, T64;
Chris@82 168 T1I = Ip[WS(rs, 15)];
Chris@82 169 T1J = Im[0];
Chris@82 170 T63 = T1I - T1J;
Chris@82 171 T1Z = Ip[WS(rs, 7)];
Chris@82 172 T20 = Im[WS(rs, 8)];
Chris@82 173 T64 = T1Z - T20;
Chris@82 174 T1K = T1I + T1J;
Chris@82 175 T7t = T63 - T64;
Chris@82 176 T21 = T1Z + T20;
Chris@82 177 T65 = T63 + T64;
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E TU, T1M, T1P, T66, TX, T1R, T1U, T67;
Chris@82 181 {
Chris@82 182 E TS, TT, T1N, T1O;
Chris@82 183 TS = Rp[WS(rs, 3)];
Chris@82 184 TT = Rm[WS(rs, 12)];
Chris@82 185 TU = TS + TT;
Chris@82 186 T1M = TS - TT;
Chris@82 187 T1N = Ip[WS(rs, 3)];
Chris@82 188 T1O = Im[WS(rs, 12)];
Chris@82 189 T1P = T1N + T1O;
Chris@82 190 T66 = T1N - T1O;
Chris@82 191 }
Chris@82 192 {
Chris@82 193 E TV, TW, T1S, T1T;
Chris@82 194 TV = Rm[WS(rs, 4)];
Chris@82 195 TW = Rp[WS(rs, 11)];
Chris@82 196 TX = TV + TW;
Chris@82 197 T1R = TV - TW;
Chris@82 198 T1S = Ip[WS(rs, 11)];
Chris@82 199 T1T = Im[WS(rs, 4)];
Chris@82 200 T1U = T1S + T1T;
Chris@82 201 T67 = T1S - T1T;
Chris@82 202 }
Chris@82 203 TY = TU + TX;
Chris@82 204 T7u = TU - TX;
Chris@82 205 T7x = T67 - T66;
Chris@82 206 T1Q = T1M + T1P;
Chris@82 207 T1V = T1R + T1U;
Chris@82 208 T24 = T1R - T1U;
Chris@82 209 T68 = T66 + T67;
Chris@82 210 T23 = T1M - T1P;
Chris@82 211 }
Chris@82 212 TZ = TR + TY;
Chris@82 213 T6X = T65 + T68;
Chris@82 214 {
Chris@82 215 E T1L, T1W, T8n, T8o;
Chris@82 216 T1L = T1H - T1K;
Chris@82 217 T1W = T1Q - T1V;
Chris@82 218 T1X = FNMS(KP707106781, T1W, T1L);
Chris@82 219 T3p = FMA(KP707106781, T1W, T1L);
Chris@82 220 T8n = T7u + T7t;
Chris@82 221 T8o = T7w + T7x;
Chris@82 222 T8p = FNMS(KP414213562, T8o, T8n);
Chris@82 223 T8B = FMA(KP414213562, T8n, T8o);
Chris@82 224 }
Chris@82 225 {
Chris@82 226 E T22, T25, T4l, T4m;
Chris@82 227 T22 = T1Y - T21;
Chris@82 228 T25 = T23 + T24;
Chris@82 229 T26 = FNMS(KP707106781, T25, T22);
Chris@82 230 T3o = FMA(KP707106781, T25, T22);
Chris@82 231 T4l = T1H + T1K;
Chris@82 232 T4m = T23 - T24;
Chris@82 233 T4n = FNMS(KP707106781, T4m, T4l);
Chris@82 234 T58 = FMA(KP707106781, T4m, T4l);
Chris@82 235 }
Chris@82 236 T7v = T7t - T7u;
Chris@82 237 T7y = T7w - T7x;
Chris@82 238 T7z = FMA(KP414213562, T7y, T7v);
Chris@82 239 T7T = FNMS(KP414213562, T7v, T7y);
Chris@82 240 {
Chris@82 241 E T4i, T4j, T62, T69;
Chris@82 242 T4i = T1Y + T21;
Chris@82 243 T4j = T1Q + T1V;
Chris@82 244 T4k = FNMS(KP707106781, T4j, T4i);
Chris@82 245 T59 = FMA(KP707106781, T4j, T4i);
Chris@82 246 T62 = TR - TY;
Chris@82 247 T69 = T65 - T68;
Chris@82 248 T6a = T62 + T69;
Chris@82 249 T6p = T69 - T62;
Chris@82 250 }
Chris@82 251 }
Chris@82 252 {
Chris@82 253 E TC, T7D, T28, T2p, T2b, T7A, T2s, T5W, TJ, T7B, T7E, T2h, T2m, T2v, T5Z;
Chris@82 254 E T2u, T7C, T7F;
Chris@82 255 {
Chris@82 256 E Tw, Tx, Ty, Tz, TA, TB;
Chris@82 257 Tw = Rp[WS(rs, 1)];
Chris@82 258 Tx = Rm[WS(rs, 14)];
Chris@82 259 Ty = Tw + Tx;
Chris@82 260 Tz = Rp[WS(rs, 9)];
Chris@82 261 TA = Rm[WS(rs, 6)];
Chris@82 262 TB = Tz + TA;
Chris@82 263 TC = Ty + TB;
Chris@82 264 T7D = Ty - TB;
Chris@82 265 T28 = Tz - TA;
Chris@82 266 T2p = Tw - Tx;
Chris@82 267 }
Chris@82 268 {
Chris@82 269 E T29, T2a, T5U, T2q, T2r, T5V;
Chris@82 270 T29 = Ip[WS(rs, 1)];
Chris@82 271 T2a = Im[WS(rs, 14)];
Chris@82 272 T5U = T29 - T2a;
Chris@82 273 T2q = Ip[WS(rs, 9)];
Chris@82 274 T2r = Im[WS(rs, 6)];
Chris@82 275 T5V = T2q - T2r;
Chris@82 276 T2b = T29 + T2a;
Chris@82 277 T7A = T5U - T5V;
Chris@82 278 T2s = T2q + T2r;
Chris@82 279 T5W = T5U + T5V;
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E TF, T2d, T2g, T5X, TI, T2i, T2l, T5Y;
Chris@82 283 {
Chris@82 284 E TD, TE, T2e, T2f;
Chris@82 285 TD = Rp[WS(rs, 5)];
Chris@82 286 TE = Rm[WS(rs, 10)];
Chris@82 287 TF = TD + TE;
Chris@82 288 T2d = TD - TE;
Chris@82 289 T2e = Ip[WS(rs, 5)];
Chris@82 290 T2f = Im[WS(rs, 10)];
Chris@82 291 T2g = T2e + T2f;
Chris@82 292 T5X = T2e - T2f;
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E TG, TH, T2j, T2k;
Chris@82 296 TG = Rm[WS(rs, 2)];
Chris@82 297 TH = Rp[WS(rs, 13)];
Chris@82 298 TI = TG + TH;
Chris@82 299 T2i = TG - TH;
Chris@82 300 T2j = Ip[WS(rs, 13)];
Chris@82 301 T2k = Im[WS(rs, 2)];
Chris@82 302 T2l = T2j + T2k;
Chris@82 303 T5Y = T2j - T2k;
Chris@82 304 }
Chris@82 305 TJ = TF + TI;
Chris@82 306 T7B = TF - TI;
Chris@82 307 T7E = T5Y - T5X;
Chris@82 308 T2h = T2d + T2g;
Chris@82 309 T2m = T2i + T2l;
Chris@82 310 T2v = T2i - T2l;
Chris@82 311 T5Z = T5X + T5Y;
Chris@82 312 T2u = T2d - T2g;
Chris@82 313 }
Chris@82 314 TK = TC + TJ;
Chris@82 315 T6W = T5W + T5Z;
Chris@82 316 {
Chris@82 317 E T2c, T2n, T8q, T8r;
Chris@82 318 T2c = T28 + T2b;
Chris@82 319 T2n = T2h - T2m;
Chris@82 320 T2o = FNMS(KP707106781, T2n, T2c);
Chris@82 321 T3m = FMA(KP707106781, T2n, T2c);
Chris@82 322 T8q = T7B + T7A;
Chris@82 323 T8r = T7D + T7E;
Chris@82 324 T8s = FMA(KP414213562, T8r, T8q);
Chris@82 325 T8A = FNMS(KP414213562, T8q, T8r);
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E T2t, T2w, T4e, T4f;
Chris@82 329 T2t = T2p - T2s;
Chris@82 330 T2w = T2u + T2v;
Chris@82 331 T2x = FNMS(KP707106781, T2w, T2t);
Chris@82 332 T3l = FMA(KP707106781, T2w, T2t);
Chris@82 333 T4e = T2b - T28;
Chris@82 334 T4f = T2v - T2u;
Chris@82 335 T4g = FNMS(KP707106781, T4f, T4e);
Chris@82 336 T55 = FMA(KP707106781, T4f, T4e);
Chris@82 337 }
Chris@82 338 T7C = T7A - T7B;
Chris@82 339 T7F = T7D - T7E;
Chris@82 340 T7G = FNMS(KP414213562, T7F, T7C);
Chris@82 341 T7S = FMA(KP414213562, T7C, T7F);
Chris@82 342 {
Chris@82 343 E T4b, T4c, T5T, T60;
Chris@82 344 T4b = T2p + T2s;
Chris@82 345 T4c = T2h + T2m;
Chris@82 346 T4d = FNMS(KP707106781, T4c, T4b);
Chris@82 347 T56 = FMA(KP707106781, T4c, T4b);
Chris@82 348 T5T = TC - TJ;
Chris@82 349 T60 = T5W - T5Z;
Chris@82 350 T61 = T5T - T60;
Chris@82 351 T6o = T5T + T60;
Chris@82 352 }
Chris@82 353 }
Chris@82 354 {
Chris@82 355 E Ti, T5O, Tl, T5P, T1y, T1D, T7p, T7o, T44, T43, Tp, T5L, Ts, T5M, T1n;
Chris@82 356 E T1s, T7m, T7l, T47, T46;
Chris@82 357 {
Chris@82 358 E T1z, T1x, T1u, T1C;
Chris@82 359 {
Chris@82 360 E Tg, Th, T1v, T1w;
Chris@82 361 Tg = Rp[WS(rs, 2)];
Chris@82 362 Th = Rm[WS(rs, 13)];
Chris@82 363 Ti = Tg + Th;
Chris@82 364 T1z = Tg - Th;
Chris@82 365 T1v = Ip[WS(rs, 2)];
Chris@82 366 T1w = Im[WS(rs, 13)];
Chris@82 367 T1x = T1v + T1w;
Chris@82 368 T5O = T1v - T1w;
Chris@82 369 }
Chris@82 370 {
Chris@82 371 E Tj, Tk, T1A, T1B;
Chris@82 372 Tj = Rp[WS(rs, 10)];
Chris@82 373 Tk = Rm[WS(rs, 5)];
Chris@82 374 Tl = Tj + Tk;
Chris@82 375 T1u = Tj - Tk;
Chris@82 376 T1A = Ip[WS(rs, 10)];
Chris@82 377 T1B = Im[WS(rs, 5)];
Chris@82 378 T1C = T1A + T1B;
Chris@82 379 T5P = T1A - T1B;
Chris@82 380 }
Chris@82 381 T1y = T1u + T1x;
Chris@82 382 T1D = T1z - T1C;
Chris@82 383 T7p = T5O - T5P;
Chris@82 384 T7o = Ti - Tl;
Chris@82 385 T44 = T1z + T1C;
Chris@82 386 T43 = T1x - T1u;
Chris@82 387 }
Chris@82 388 {
Chris@82 389 E T1o, T1m, T1j, T1r;
Chris@82 390 {
Chris@82 391 E Tn, To, T1k, T1l;
Chris@82 392 Tn = Rm[WS(rs, 1)];
Chris@82 393 To = Rp[WS(rs, 14)];
Chris@82 394 Tp = Tn + To;
Chris@82 395 T1o = Tn - To;
Chris@82 396 T1k = Ip[WS(rs, 14)];
Chris@82 397 T1l = Im[WS(rs, 1)];
Chris@82 398 T1m = T1k + T1l;
Chris@82 399 T5L = T1k - T1l;
Chris@82 400 }
Chris@82 401 {
Chris@82 402 E Tq, Tr, T1p, T1q;
Chris@82 403 Tq = Rp[WS(rs, 6)];
Chris@82 404 Tr = Rm[WS(rs, 9)];
Chris@82 405 Ts = Tq + Tr;
Chris@82 406 T1j = Tq - Tr;
Chris@82 407 T1p = Ip[WS(rs, 6)];
Chris@82 408 T1q = Im[WS(rs, 9)];
Chris@82 409 T1r = T1p + T1q;
Chris@82 410 T5M = T1p - T1q;
Chris@82 411 }
Chris@82 412 T1n = T1j - T1m;
Chris@82 413 T1s = T1o - T1r;
Chris@82 414 T7m = Tp - Ts;
Chris@82 415 T7l = T5L - T5M;
Chris@82 416 T47 = T1o + T1r;
Chris@82 417 T46 = T1j + T1m;
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E Tm, Tt, T7n, T7q;
Chris@82 421 Tm = Ti + Tl;
Chris@82 422 Tt = Tp + Ts;
Chris@82 423 Tu = Tm + Tt;
Chris@82 424 T6f = Tm - Tt;
Chris@82 425 T7n = T7l - T7m;
Chris@82 426 T7q = T7o + T7p;
Chris@82 427 T7r = T7n - T7q;
Chris@82 428 T8y = T7q + T7n;
Chris@82 429 }
Chris@82 430 {
Chris@82 431 E T7O, T7P, T1t, T1E;
Chris@82 432 T7O = T7o - T7p;
Chris@82 433 T7P = T7m + T7l;
Chris@82 434 T7Q = T7O - T7P;
Chris@82 435 T8l = T7O + T7P;
Chris@82 436 T1t = FNMS(KP414213562, T1s, T1n);
Chris@82 437 T1E = FMA(KP414213562, T1D, T1y);
Chris@82 438 T1F = T1t - T1E;
Chris@82 439 T3w = T1E + T1t;
Chris@82 440 }
Chris@82 441 {
Chris@82 442 E T2M, T2N, T4w, T4x;
Chris@82 443 T2M = FNMS(KP414213562, T1y, T1D);
Chris@82 444 T2N = FMA(KP414213562, T1n, T1s);
Chris@82 445 T2O = T2M - T2N;
Chris@82 446 T3j = T2M + T2N;
Chris@82 447 T4w = FMA(KP414213562, T43, T44);
Chris@82 448 T4x = FMA(KP414213562, T46, T47);
Chris@82 449 T4y = T4w - T4x;
Chris@82 450 T53 = T4w + T4x;
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E T5N, T5Q, T45, T48;
Chris@82 454 T5N = T5L + T5M;
Chris@82 455 T5Q = T5O + T5P;
Chris@82 456 T5R = T5N - T5Q;
Chris@82 457 T6U = T5Q + T5N;
Chris@82 458 T45 = FNMS(KP414213562, T44, T43);
Chris@82 459 T48 = FNMS(KP414213562, T47, T46);
Chris@82 460 T49 = T45 + T48;
Chris@82 461 T5g = T48 - T45;
Chris@82 462 }
Chris@82 463 }
Chris@82 464 {
Chris@82 465 E Tv, T10, T6Q, T6V, T6Y, T6Z;
Chris@82 466 Tv = Tf + Tu;
Chris@82 467 T10 = TK + TZ;
Chris@82 468 T6Q = Tv - T10;
Chris@82 469 T6V = T6T + T6U;
Chris@82 470 T6Y = T6W + T6X;
Chris@82 471 T6Z = T6V - T6Y;
Chris@82 472 Rp[0] = Tv + T10;
Chris@82 473 Rm[0] = T6V + T6Y;
Chris@82 474 {
Chris@82 475 E T6P, T6R, T6S, T70;
Chris@82 476 T6P = W[30];
Chris@82 477 T6R = T6P * T6Q;
Chris@82 478 T6S = W[31];
Chris@82 479 T70 = T6S * T6Q;
Chris@82 480 Rp[WS(rs, 8)] = FNMS(T6S, T6Z, T6R);
Chris@82 481 Rm[WS(rs, 8)] = FMA(T6P, T6Z, T70);
Chris@82 482 }
Chris@82 483 }
Chris@82 484 {
Chris@82 485 E T8O, T8W, T8T, T8Z;
Chris@82 486 {
Chris@82 487 E T8M, T8N, T8R, T8S;
Chris@82 488 T8M = FMA(KP707106781, T8l, T8k);
Chris@82 489 T8N = T8A + T8B;
Chris@82 490 T8O = FNMS(KP923879532, T8N, T8M);
Chris@82 491 T8W = FMA(KP923879532, T8N, T8M);
Chris@82 492 T8R = FMA(KP707106781, T8y, T8x);
Chris@82 493 T8S = T8s + T8p;
Chris@82 494 T8T = FNMS(KP923879532, T8S, T8R);
Chris@82 495 T8Z = FMA(KP923879532, T8S, T8R);
Chris@82 496 }
Chris@82 497 {
Chris@82 498 E T8P, T8U, T8L, T8Q;
Chris@82 499 T8L = W[34];
Chris@82 500 T8P = T8L * T8O;
Chris@82 501 T8U = T8L * T8T;
Chris@82 502 T8Q = W[35];
Chris@82 503 Rp[WS(rs, 9)] = FNMS(T8Q, T8T, T8P);
Chris@82 504 Rm[WS(rs, 9)] = FMA(T8Q, T8O, T8U);
Chris@82 505 }
Chris@82 506 {
Chris@82 507 E T8X, T90, T8V, T8Y;
Chris@82 508 T8V = W[2];
Chris@82 509 T8X = T8V * T8W;
Chris@82 510 T90 = T8V * T8Z;
Chris@82 511 T8Y = W[3];
Chris@82 512 Rp[WS(rs, 1)] = FNMS(T8Y, T8Z, T8X);
Chris@82 513 Rm[WS(rs, 1)] = FMA(T8Y, T8W, T90);
Chris@82 514 }
Chris@82 515 }
Chris@82 516 {
Chris@82 517 E T86, T8e, T8b, T8h;
Chris@82 518 {
Chris@82 519 E T84, T85, T89, T8a;
Chris@82 520 T84 = FNMS(KP707106781, T7r, T7k);
Chris@82 521 T85 = T7S + T7T;
Chris@82 522 T86 = FNMS(KP923879532, T85, T84);
Chris@82 523 T8e = FMA(KP923879532, T85, T84);
Chris@82 524 T89 = FNMS(KP707106781, T7Q, T7N);
Chris@82 525 T8a = T7G + T7z;
Chris@82 526 T8b = FNMS(KP923879532, T8a, T89);
Chris@82 527 T8h = FMA(KP923879532, T8a, T89);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T87, T8c, T83, T88;
Chris@82 531 T83 = W[26];
Chris@82 532 T87 = T83 * T86;
Chris@82 533 T8c = T83 * T8b;
Chris@82 534 T88 = W[27];
Chris@82 535 Rp[WS(rs, 7)] = FNMS(T88, T8b, T87);
Chris@82 536 Rm[WS(rs, 7)] = FMA(T88, T86, T8c);
Chris@82 537 }
Chris@82 538 {
Chris@82 539 E T8f, T8i, T8d, T8g;
Chris@82 540 T8d = W[58];
Chris@82 541 T8f = T8d * T8e;
Chris@82 542 T8i = T8d * T8h;
Chris@82 543 T8g = W[59];
Chris@82 544 Rp[WS(rs, 15)] = FNMS(T8g, T8h, T8f);
Chris@82 545 Rm[WS(rs, 15)] = FMA(T8g, T8e, T8i);
Chris@82 546 }
Chris@82 547 }
Chris@82 548 {
Chris@82 549 E T6C, T6K, T6H, T6N;
Chris@82 550 {
Chris@82 551 E T6A, T6B, T6F, T6G;
Chris@82 552 T6A = T5K - T5R;
Chris@82 553 T6B = T6p - T6o;
Chris@82 554 T6C = FNMS(KP707106781, T6B, T6A);
Chris@82 555 T6K = FMA(KP707106781, T6B, T6A);
Chris@82 556 T6F = T6m - T6f;
Chris@82 557 T6G = T61 - T6a;
Chris@82 558 T6H = FNMS(KP707106781, T6G, T6F);
Chris@82 559 T6N = FMA(KP707106781, T6G, T6F);
Chris@82 560 }
Chris@82 561 {
Chris@82 562 E T6D, T6I, T6z, T6E;
Chris@82 563 T6z = W[54];
Chris@82 564 T6D = T6z * T6C;
Chris@82 565 T6I = T6z * T6H;
Chris@82 566 T6E = W[55];
Chris@82 567 Rp[WS(rs, 14)] = FNMS(T6E, T6H, T6D);
Chris@82 568 Rm[WS(rs, 14)] = FMA(T6E, T6C, T6I);
Chris@82 569 }
Chris@82 570 {
Chris@82 571 E T6L, T6O, T6J, T6M;
Chris@82 572 T6J = W[22];
Chris@82 573 T6L = T6J * T6K;
Chris@82 574 T6O = T6J * T6N;
Chris@82 575 T6M = W[23];
Chris@82 576 Rp[WS(rs, 6)] = FNMS(T6M, T6N, T6L);
Chris@82 577 Rm[WS(rs, 6)] = FMA(T6M, T6K, T6O);
Chris@82 578 }
Chris@82 579 }
Chris@82 580 {
Chris@82 581 E T8u, T8G, T8D, T8J;
Chris@82 582 {
Chris@82 583 E T8m, T8t, T8z, T8C;
Chris@82 584 T8m = FNMS(KP707106781, T8l, T8k);
Chris@82 585 T8t = T8p - T8s;
Chris@82 586 T8u = FNMS(KP923879532, T8t, T8m);
Chris@82 587 T8G = FMA(KP923879532, T8t, T8m);
Chris@82 588 T8z = FNMS(KP707106781, T8y, T8x);
Chris@82 589 T8C = T8A - T8B;
Chris@82 590 T8D = FNMS(KP923879532, T8C, T8z);
Chris@82 591 T8J = FMA(KP923879532, T8C, T8z);
Chris@82 592 }
Chris@82 593 {
Chris@82 594 E T8j, T8v, T8w, T8E;
Chris@82 595 T8j = W[50];
Chris@82 596 T8v = T8j * T8u;
Chris@82 597 T8w = W[51];
Chris@82 598 T8E = T8w * T8u;
Chris@82 599 Rp[WS(rs, 13)] = FNMS(T8w, T8D, T8v);
Chris@82 600 Rm[WS(rs, 13)] = FMA(T8j, T8D, T8E);
Chris@82 601 }
Chris@82 602 {
Chris@82 603 E T8F, T8H, T8I, T8K;
Chris@82 604 T8F = W[18];
Chris@82 605 T8H = T8F * T8G;
Chris@82 606 T8I = W[19];
Chris@82 607 T8K = T8I * T8G;
Chris@82 608 Rp[WS(rs, 5)] = FNMS(T8I, T8J, T8H);
Chris@82 609 Rm[WS(rs, 5)] = FMA(T8F, T8J, T8K);
Chris@82 610 }
Chris@82 611 }
Chris@82 612 {
Chris@82 613 E T6c, T6u, T6r, T6x;
Chris@82 614 {
Chris@82 615 E T5S, T6b, T6n, T6q;
Chris@82 616 T5S = T5K + T5R;
Chris@82 617 T6b = T61 + T6a;
Chris@82 618 T6c = FNMS(KP707106781, T6b, T5S);
Chris@82 619 T6u = FMA(KP707106781, T6b, T5S);
Chris@82 620 T6n = T6f + T6m;
Chris@82 621 T6q = T6o + T6p;
Chris@82 622 T6r = FNMS(KP707106781, T6q, T6n);
Chris@82 623 T6x = FMA(KP707106781, T6q, T6n);
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T5J, T6d, T6e, T6s;
Chris@82 627 T5J = W[38];
Chris@82 628 T6d = T5J * T6c;
Chris@82 629 T6e = W[39];
Chris@82 630 T6s = T6e * T6c;
Chris@82 631 Rp[WS(rs, 10)] = FNMS(T6e, T6r, T6d);
Chris@82 632 Rm[WS(rs, 10)] = FMA(T5J, T6r, T6s);
Chris@82 633 }
Chris@82 634 {
Chris@82 635 E T6t, T6v, T6w, T6y;
Chris@82 636 T6t = W[6];
Chris@82 637 T6v = T6t * T6u;
Chris@82 638 T6w = W[7];
Chris@82 639 T6y = T6w * T6u;
Chris@82 640 Rp[WS(rs, 2)] = FNMS(T6w, T6x, T6v);
Chris@82 641 Rm[WS(rs, 2)] = FMA(T6t, T6x, T6y);
Chris@82 642 }
Chris@82 643 }
Chris@82 644 {
Chris@82 645 E T74, T7c, T79, T7f;
Chris@82 646 {
Chris@82 647 E T72, T73, T77, T78;
Chris@82 648 T72 = Tf - Tu;
Chris@82 649 T73 = T6X - T6W;
Chris@82 650 T74 = T72 - T73;
Chris@82 651 T7c = T72 + T73;
Chris@82 652 T77 = T6T - T6U;
Chris@82 653 T78 = TK - TZ;
Chris@82 654 T79 = T77 - T78;
Chris@82 655 T7f = T78 + T77;
Chris@82 656 }
Chris@82 657 {
Chris@82 658 E T75, T7a, T71, T76;
Chris@82 659 T71 = W[46];
Chris@82 660 T75 = T71 * T74;
Chris@82 661 T7a = T71 * T79;
Chris@82 662 T76 = W[47];
Chris@82 663 Rp[WS(rs, 12)] = FNMS(T76, T79, T75);
Chris@82 664 Rm[WS(rs, 12)] = FMA(T76, T74, T7a);
Chris@82 665 }
Chris@82 666 {
Chris@82 667 E T7d, T7g, T7b, T7e;
Chris@82 668 T7b = W[14];
Chris@82 669 T7d = T7b * T7c;
Chris@82 670 T7g = T7b * T7f;
Chris@82 671 T7e = W[15];
Chris@82 672 Rp[WS(rs, 4)] = FNMS(T7e, T7f, T7d);
Chris@82 673 Rm[WS(rs, 4)] = FMA(T7e, T7c, T7g);
Chris@82 674 }
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E T7I, T7Y, T7V, T81;
Chris@82 678 {
Chris@82 679 E T7s, T7H, T7R, T7U;
Chris@82 680 T7s = FMA(KP707106781, T7r, T7k);
Chris@82 681 T7H = T7z - T7G;
Chris@82 682 T7I = FNMS(KP923879532, T7H, T7s);
Chris@82 683 T7Y = FMA(KP923879532, T7H, T7s);
Chris@82 684 T7R = FMA(KP707106781, T7Q, T7N);
Chris@82 685 T7U = T7S - T7T;
Chris@82 686 T7V = FNMS(KP923879532, T7U, T7R);
Chris@82 687 T81 = FMA(KP923879532, T7U, T7R);
Chris@82 688 }
Chris@82 689 {
Chris@82 690 E T7h, T7J, T7K, T7W;
Chris@82 691 T7h = W[42];
Chris@82 692 T7J = T7h * T7I;
Chris@82 693 T7K = W[43];
Chris@82 694 T7W = T7K * T7I;
Chris@82 695 Rp[WS(rs, 11)] = FNMS(T7K, T7V, T7J);
Chris@82 696 Rm[WS(rs, 11)] = FMA(T7h, T7V, T7W);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E T7X, T7Z, T80, T82;
Chris@82 700 T7X = W[10];
Chris@82 701 T7Z = T7X * T7Y;
Chris@82 702 T80 = W[11];
Chris@82 703 T82 = T80 * T7Y;
Chris@82 704 Rp[WS(rs, 3)] = FNMS(T80, T81, T7Z);
Chris@82 705 Rm[WS(rs, 3)] = FMA(T7X, T81, T82);
Chris@82 706 }
Chris@82 707 }
Chris@82 708 {
Chris@82 709 E T37, T2A, T38, T2W, T2T, T3c, T2Z, T34;
Chris@82 710 T37 = FNMS(KP923879532, T2O, T2L);
Chris@82 711 {
Chris@82 712 E T1G, T27, T2y, T2z;
Chris@82 713 T1G = FMA(KP923879532, T1F, T1i);
Chris@82 714 T27 = FMA(KP668178637, T26, T1X);
Chris@82 715 T2y = FNMS(KP668178637, T2x, T2o);
Chris@82 716 T2z = T27 - T2y;
Chris@82 717 T2A = FNMS(KP831469612, T2z, T1G);
Chris@82 718 T38 = T2y + T27;
Chris@82 719 T2W = FMA(KP831469612, T2z, T1G);
Chris@82 720 }
Chris@82 721 {
Chris@82 722 E T2P, T32, T2S, T33, T2Q, T2R;
Chris@82 723 T2P = FMA(KP923879532, T2O, T2L);
Chris@82 724 T32 = FNMS(KP923879532, T1F, T1i);
Chris@82 725 T2Q = FMA(KP668178637, T2o, T2x);
Chris@82 726 T2R = FNMS(KP668178637, T1X, T26);
Chris@82 727 T2S = T2Q - T2R;
Chris@82 728 T33 = T2Q + T2R;
Chris@82 729 T2T = FNMS(KP831469612, T2S, T2P);
Chris@82 730 T3c = FMA(KP831469612, T33, T32);
Chris@82 731 T2Z = FMA(KP831469612, T2S, T2P);
Chris@82 732 T34 = FNMS(KP831469612, T33, T32);
Chris@82 733 }
Chris@82 734 {
Chris@82 735 E T2B, T2U, T11, T2C;
Chris@82 736 T11 = W[40];
Chris@82 737 T2B = T11 * T2A;
Chris@82 738 T2U = T11 * T2T;
Chris@82 739 T2C = W[41];
Chris@82 740 Ip[WS(rs, 10)] = FNMS(T2C, T2T, T2B);
Chris@82 741 Im[WS(rs, 10)] = FMA(T2C, T2A, T2U);
Chris@82 742 }
Chris@82 743 {
Chris@82 744 E T2X, T30, T2V, T2Y;
Chris@82 745 T2V = W[8];
Chris@82 746 T2X = T2V * T2W;
Chris@82 747 T30 = T2V * T2Z;
Chris@82 748 T2Y = W[9];
Chris@82 749 Ip[WS(rs, 2)] = FNMS(T2Y, T2Z, T2X);
Chris@82 750 Im[WS(rs, 2)] = FMA(T2Y, T2W, T30);
Chris@82 751 }
Chris@82 752 {
Chris@82 753 E T39, T36, T3a, T31, T35;
Chris@82 754 T39 = FNMS(KP831469612, T38, T37);
Chris@82 755 T36 = W[25];
Chris@82 756 T3a = T36 * T34;
Chris@82 757 T31 = W[24];
Chris@82 758 T35 = T31 * T34;
Chris@82 759 Ip[WS(rs, 6)] = FNMS(T36, T39, T35);
Chris@82 760 Im[WS(rs, 6)] = FMA(T31, T39, T3a);
Chris@82 761 }
Chris@82 762 {
Chris@82 763 E T3f, T3e, T3g, T3b, T3d;
Chris@82 764 T3f = FMA(KP831469612, T38, T37);
Chris@82 765 T3e = W[57];
Chris@82 766 T3g = T3e * T3c;
Chris@82 767 T3b = W[56];
Chris@82 768 T3d = T3b * T3c;
Chris@82 769 Ip[WS(rs, 14)] = FNMS(T3e, T3f, T3d);
Chris@82 770 Im[WS(rs, 14)] = FMA(T3b, T3f, T3g);
Chris@82 771 }
Chris@82 772 }
Chris@82 773 {
Chris@82 774 E T4z, T4C, T4W, T4O, T4q, T4Z, T4G, T4T;
Chris@82 775 T4z = FMA(KP923879532, T4y, T4v);
Chris@82 776 {
Chris@82 777 E T4M, T4A, T4B, T4N;
Chris@82 778 T4M = FMA(KP923879532, T49, T42);
Chris@82 779 T4A = FMA(KP668178637, T4d, T4g);
Chris@82 780 T4B = FMA(KP668178637, T4k, T4n);
Chris@82 781 T4N = T4A + T4B;
Chris@82 782 T4C = T4A - T4B;
Chris@82 783 T4W = FMA(KP831469612, T4N, T4M);
Chris@82 784 T4O = FNMS(KP831469612, T4N, T4M);
Chris@82 785 }
Chris@82 786 {
Chris@82 787 E T4a, T4R, T4p, T4S, T4h, T4o;
Chris@82 788 T4a = FNMS(KP923879532, T49, T42);
Chris@82 789 T4R = FNMS(KP923879532, T4y, T4v);
Chris@82 790 T4h = FNMS(KP668178637, T4g, T4d);
Chris@82 791 T4o = FNMS(KP668178637, T4n, T4k);
Chris@82 792 T4p = T4h + T4o;
Chris@82 793 T4S = T4h - T4o;
Chris@82 794 T4q = FNMS(KP831469612, T4p, T4a);
Chris@82 795 T4Z = FNMS(KP831469612, T4S, T4R);
Chris@82 796 T4G = FMA(KP831469612, T4p, T4a);
Chris@82 797 T4T = FMA(KP831469612, T4S, T4R);
Chris@82 798 }
Chris@82 799 {
Chris@82 800 E T4P, T4U, T4L, T4Q;
Chris@82 801 T4L = W[20];
Chris@82 802 T4P = T4L * T4O;
Chris@82 803 T4U = T4L * T4T;
Chris@82 804 T4Q = W[21];
Chris@82 805 Ip[WS(rs, 5)] = FNMS(T4Q, T4T, T4P);
Chris@82 806 Im[WS(rs, 5)] = FMA(T4Q, T4O, T4U);
Chris@82 807 }
Chris@82 808 {
Chris@82 809 E T4X, T50, T4V, T4Y;
Chris@82 810 T4V = W[52];
Chris@82 811 T4X = T4V * T4W;
Chris@82 812 T50 = T4V * T4Z;
Chris@82 813 T4Y = W[53];
Chris@82 814 Ip[WS(rs, 13)] = FNMS(T4Y, T4Z, T4X);
Chris@82 815 Im[WS(rs, 13)] = FMA(T4Y, T4W, T50);
Chris@82 816 }
Chris@82 817 {
Chris@82 818 E T4D, T4s, T4E, T3Z, T4r;
Chris@82 819 T4D = FNMS(KP831469612, T4C, T4z);
Chris@82 820 T4s = W[37];
Chris@82 821 T4E = T4s * T4q;
Chris@82 822 T3Z = W[36];
Chris@82 823 T4r = T3Z * T4q;
Chris@82 824 Ip[WS(rs, 9)] = FNMS(T4s, T4D, T4r);
Chris@82 825 Im[WS(rs, 9)] = FMA(T3Z, T4D, T4E);
Chris@82 826 }
Chris@82 827 {
Chris@82 828 E T4J, T4I, T4K, T4F, T4H;
Chris@82 829 T4J = FMA(KP831469612, T4C, T4z);
Chris@82 830 T4I = W[5];
Chris@82 831 T4K = T4I * T4G;
Chris@82 832 T4F = W[4];
Chris@82 833 T4H = T4F * T4G;
Chris@82 834 Ip[WS(rs, 1)] = FNMS(T4I, T4J, T4H);
Chris@82 835 Im[WS(rs, 1)] = FMA(T4F, T4J, T4K);
Chris@82 836 }
Chris@82 837 }
Chris@82 838 {
Chris@82 839 E T3x, T3A, T3U, T3M, T3s, T3X, T3E, T3R;
Chris@82 840 T3x = FMA(KP923879532, T3w, T3v);
Chris@82 841 {
Chris@82 842 E T3K, T3y, T3z, T3L;
Chris@82 843 T3K = FNMS(KP923879532, T3j, T3i);
Chris@82 844 T3y = FMA(KP198912367, T3l, T3m);
Chris@82 845 T3z = FNMS(KP198912367, T3o, T3p);
Chris@82 846 T3L = T3z - T3y;
Chris@82 847 T3A = T3y + T3z;
Chris@82 848 T3U = FMA(KP980785280, T3L, T3K);
Chris@82 849 T3M = FNMS(KP980785280, T3L, T3K);
Chris@82 850 }
Chris@82 851 {
Chris@82 852 E T3k, T3P, T3r, T3Q, T3n, T3q;
Chris@82 853 T3k = FMA(KP923879532, T3j, T3i);
Chris@82 854 T3P = FNMS(KP923879532, T3w, T3v);
Chris@82 855 T3n = FNMS(KP198912367, T3m, T3l);
Chris@82 856 T3q = FMA(KP198912367, T3p, T3o);
Chris@82 857 T3r = T3n + T3q;
Chris@82 858 T3Q = T3n - T3q;
Chris@82 859 T3s = FNMS(KP980785280, T3r, T3k);
Chris@82 860 T3X = FMA(KP980785280, T3Q, T3P);
Chris@82 861 T3E = FMA(KP980785280, T3r, T3k);
Chris@82 862 T3R = FNMS(KP980785280, T3Q, T3P);
Chris@82 863 }
Chris@82 864 {
Chris@82 865 E T3N, T3S, T3J, T3O;
Chris@82 866 T3J = W[48];
Chris@82 867 T3N = T3J * T3M;
Chris@82 868 T3S = T3J * T3R;
Chris@82 869 T3O = W[49];
Chris@82 870 Ip[WS(rs, 12)] = FNMS(T3O, T3R, T3N);
Chris@82 871 Im[WS(rs, 12)] = FMA(T3O, T3M, T3S);
Chris@82 872 }
Chris@82 873 {
Chris@82 874 E T3V, T3Y, T3T, T3W;
Chris@82 875 T3T = W[16];
Chris@82 876 T3V = T3T * T3U;
Chris@82 877 T3Y = T3T * T3X;
Chris@82 878 T3W = W[17];
Chris@82 879 Ip[WS(rs, 4)] = FNMS(T3W, T3X, T3V);
Chris@82 880 Im[WS(rs, 4)] = FMA(T3W, T3U, T3Y);
Chris@82 881 }
Chris@82 882 {
Chris@82 883 E T3B, T3u, T3C, T3h, T3t;
Chris@82 884 T3B = FNMS(KP980785280, T3A, T3x);
Chris@82 885 T3u = W[33];
Chris@82 886 T3C = T3u * T3s;
Chris@82 887 T3h = W[32];
Chris@82 888 T3t = T3h * T3s;
Chris@82 889 Ip[WS(rs, 8)] = FNMS(T3u, T3B, T3t);
Chris@82 890 Im[WS(rs, 8)] = FMA(T3h, T3B, T3C);
Chris@82 891 }
Chris@82 892 {
Chris@82 893 E T3H, T3G, T3I, T3D, T3F;
Chris@82 894 T3H = FMA(KP980785280, T3A, T3x);
Chris@82 895 T3G = W[1];
Chris@82 896 T3I = T3G * T3E;
Chris@82 897 T3D = W[0];
Chris@82 898 T3F = T3D * T3E;
Chris@82 899 Ip[0] = FNMS(T3G, T3H, T3F);
Chris@82 900 Im[0] = FMA(T3D, T3H, T3I);
Chris@82 901 }
Chris@82 902 }
Chris@82 903 {
Chris@82 904 E T5h, T5k, T5E, T5w, T5c, T5H, T5o, T5B;
Chris@82 905 T5h = FMA(KP923879532, T5g, T5f);
Chris@82 906 {
Chris@82 907 E T5u, T5i, T5j, T5v;
Chris@82 908 T5u = FMA(KP923879532, T53, T52);
Chris@82 909 T5i = FMA(KP198912367, T55, T56);
Chris@82 910 T5j = FMA(KP198912367, T58, T59);
Chris@82 911 T5v = T5i + T5j;
Chris@82 912 T5k = T5i - T5j;
Chris@82 913 T5E = FMA(KP980785280, T5v, T5u);
Chris@82 914 T5w = FNMS(KP980785280, T5v, T5u);
Chris@82 915 }
Chris@82 916 {
Chris@82 917 E T54, T5z, T5b, T5A, T57, T5a;
Chris@82 918 T54 = FNMS(KP923879532, T53, T52);
Chris@82 919 T5z = FNMS(KP923879532, T5g, T5f);
Chris@82 920 T57 = FNMS(KP198912367, T56, T55);
Chris@82 921 T5a = FNMS(KP198912367, T59, T58);
Chris@82 922 T5b = T57 + T5a;
Chris@82 923 T5A = T5a - T57;
Chris@82 924 T5c = FMA(KP980785280, T5b, T54);
Chris@82 925 T5H = FNMS(KP980785280, T5A, T5z);
Chris@82 926 T5o = FNMS(KP980785280, T5b, T54);
Chris@82 927 T5B = FMA(KP980785280, T5A, T5z);
Chris@82 928 }
Chris@82 929 {
Chris@82 930 E T5x, T5C, T5t, T5y;
Chris@82 931 T5t = W[28];
Chris@82 932 T5x = T5t * T5w;
Chris@82 933 T5C = T5t * T5B;
Chris@82 934 T5y = W[29];
Chris@82 935 Ip[WS(rs, 7)] = FNMS(T5y, T5B, T5x);
Chris@82 936 Im[WS(rs, 7)] = FMA(T5y, T5w, T5C);
Chris@82 937 }
Chris@82 938 {
Chris@82 939 E T5F, T5I, T5D, T5G;
Chris@82 940 T5D = W[60];
Chris@82 941 T5F = T5D * T5E;
Chris@82 942 T5I = T5D * T5H;
Chris@82 943 T5G = W[61];
Chris@82 944 Ip[WS(rs, 15)] = FNMS(T5G, T5H, T5F);
Chris@82 945 Im[WS(rs, 15)] = FMA(T5G, T5E, T5I);
Chris@82 946 }
Chris@82 947 {
Chris@82 948 E T5l, T5e, T5m, T51, T5d;
Chris@82 949 T5l = FNMS(KP980785280, T5k, T5h);
Chris@82 950 T5e = W[45];
Chris@82 951 T5m = T5e * T5c;
Chris@82 952 T51 = W[44];
Chris@82 953 T5d = T51 * T5c;
Chris@82 954 Ip[WS(rs, 11)] = FNMS(T5e, T5l, T5d);
Chris@82 955 Im[WS(rs, 11)] = FMA(T51, T5l, T5m);
Chris@82 956 }
Chris@82 957 {
Chris@82 958 E T5r, T5q, T5s, T5n, T5p;
Chris@82 959 T5r = FMA(KP980785280, T5k, T5h);
Chris@82 960 T5q = W[13];
Chris@82 961 T5s = T5q * T5o;
Chris@82 962 T5n = W[12];
Chris@82 963 T5p = T5n * T5o;
Chris@82 964 Ip[WS(rs, 3)] = FNMS(T5q, T5r, T5p);
Chris@82 965 Im[WS(rs, 3)] = FMA(T5n, T5r, T5s);
Chris@82 966 }
Chris@82 967 }
Chris@82 968 }
Chris@82 969 }
Chris@82 970 }
Chris@82 971
Chris@82 972 static const tw_instr twinstr[] = {
Chris@82 973 {TW_FULL, 1, 32},
Chris@82 974 {TW_NEXT, 1, 0}
Chris@82 975 };
Chris@82 976
Chris@82 977 static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {236, 62, 198, 0} };
Chris@82 978
Chris@82 979 void X(codelet_hc2cb_32) (planner *p) {
Chris@82 980 X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT);
Chris@82 981 }
Chris@82 982 #else
Chris@82 983
Chris@82 984 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include rdft/scalar/hc2cb.h */
Chris@82 985
Chris@82 986 /*
Chris@82 987 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 988 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
Chris@82 989 * 98 stack variables, 7 constants, and 128 memory accesses
Chris@82 990 */
Chris@82 991 #include "rdft/scalar/hc2cb.h"
Chris@82 992
Chris@82 993 static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 994 {
Chris@82 995 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 996 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 997 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 998 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 999 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1000 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1001 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1002 {
Chris@82 1003 INT m;
Chris@82 1004 for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 1005 E T4o, T6y, T70, T5u, Tf, T12, T5x, T6z, T3m, T3Y, T29, T2y, T4v, T71, T2U;
Chris@82 1006 E T3M, Tu, T1U, T6D, T73, T6G, T74, T1h, T2z, T2X, T3o, T4D, T5A, T4K, T5z;
Chris@82 1007 E T30, T3n, TK, T1j, T6S, T7w, T6V, T7v, T1y, T2B, T3c, T3S, T4X, T61, T54;
Chris@82 1008 E T62, T3f, T3T, TZ, T1A, T6L, T7z, T6O, T7y, T1P, T2C, T35, T3P, T5g, T64;
Chris@82 1009 E T5n, T65, T38, T3Q;
Chris@82 1010 {
Chris@82 1011 E T3, T4m, T1X, T5t, T6, T5s, T20, T4n, Ta, T4p, T24, T4q, Td, T4s, T27;
Chris@82 1012 E T4t;
Chris@82 1013 {
Chris@82 1014 E T1, T2, T1V, T1W;
Chris@82 1015 T1 = Rp[0];
Chris@82 1016 T2 = Rm[WS(rs, 15)];
Chris@82 1017 T3 = T1 + T2;
Chris@82 1018 T4m = T1 - T2;
Chris@82 1019 T1V = Ip[0];
Chris@82 1020 T1W = Im[WS(rs, 15)];
Chris@82 1021 T1X = T1V - T1W;
Chris@82 1022 T5t = T1V + T1W;
Chris@82 1023 }
Chris@82 1024 {
Chris@82 1025 E T4, T5, T1Y, T1Z;
Chris@82 1026 T4 = Rp[WS(rs, 8)];
Chris@82 1027 T5 = Rm[WS(rs, 7)];
Chris@82 1028 T6 = T4 + T5;
Chris@82 1029 T5s = T4 - T5;
Chris@82 1030 T1Y = Ip[WS(rs, 8)];
Chris@82 1031 T1Z = Im[WS(rs, 7)];
Chris@82 1032 T20 = T1Y - T1Z;
Chris@82 1033 T4n = T1Y + T1Z;
Chris@82 1034 }
Chris@82 1035 {
Chris@82 1036 E T8, T9, T22, T23;
Chris@82 1037 T8 = Rp[WS(rs, 4)];
Chris@82 1038 T9 = Rm[WS(rs, 11)];
Chris@82 1039 Ta = T8 + T9;
Chris@82 1040 T4p = T8 - T9;
Chris@82 1041 T22 = Ip[WS(rs, 4)];
Chris@82 1042 T23 = Im[WS(rs, 11)];
Chris@82 1043 T24 = T22 - T23;
Chris@82 1044 T4q = T22 + T23;
Chris@82 1045 }
Chris@82 1046 {
Chris@82 1047 E Tb, Tc, T25, T26;
Chris@82 1048 Tb = Rm[WS(rs, 3)];
Chris@82 1049 Tc = Rp[WS(rs, 12)];
Chris@82 1050 Td = Tb + Tc;
Chris@82 1051 T4s = Tb - Tc;
Chris@82 1052 T25 = Ip[WS(rs, 12)];
Chris@82 1053 T26 = Im[WS(rs, 3)];
Chris@82 1054 T27 = T25 - T26;
Chris@82 1055 T4t = T25 + T26;
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T7, Te, T21, T28;
Chris@82 1059 T4o = T4m - T4n;
Chris@82 1060 T6y = T4m + T4n;
Chris@82 1061 T70 = T5t - T5s;
Chris@82 1062 T5u = T5s + T5t;
Chris@82 1063 T7 = T3 + T6;
Chris@82 1064 Te = Ta + Td;
Chris@82 1065 Tf = T7 + Te;
Chris@82 1066 T12 = T7 - Te;
Chris@82 1067 {
Chris@82 1068 E T5v, T5w, T3k, T3l;
Chris@82 1069 T5v = T4p + T4q;
Chris@82 1070 T5w = T4s + T4t;
Chris@82 1071 T5x = KP707106781 * (T5v - T5w);
Chris@82 1072 T6z = KP707106781 * (T5v + T5w);
Chris@82 1073 T3k = T1X - T20;
Chris@82 1074 T3l = Ta - Td;
Chris@82 1075 T3m = T3k - T3l;
Chris@82 1076 T3Y = T3l + T3k;
Chris@82 1077 }
Chris@82 1078 T21 = T1X + T20;
Chris@82 1079 T28 = T24 + T27;
Chris@82 1080 T29 = T21 - T28;
Chris@82 1081 T2y = T21 + T28;
Chris@82 1082 {
Chris@82 1083 E T4r, T4u, T2S, T2T;
Chris@82 1084 T4r = T4p - T4q;
Chris@82 1085 T4u = T4s - T4t;
Chris@82 1086 T4v = KP707106781 * (T4r + T4u);
Chris@82 1087 T71 = KP707106781 * (T4r - T4u);
Chris@82 1088 T2S = T3 - T6;
Chris@82 1089 T2T = T27 - T24;
Chris@82 1090 T2U = T2S - T2T;
Chris@82 1091 T3M = T2S + T2T;
Chris@82 1092 }
Chris@82 1093 }
Chris@82 1094 }
Chris@82 1095 {
Chris@82 1096 E Ti, T4H, T1c, T4F, Tl, T4E, T1f, T4I, Tp, T4A, T15, T4y, Ts, T4x, T18;
Chris@82 1097 E T4B;
Chris@82 1098 {
Chris@82 1099 E Tg, Th, T1a, T1b;
Chris@82 1100 Tg = Rp[WS(rs, 2)];
Chris@82 1101 Th = Rm[WS(rs, 13)];
Chris@82 1102 Ti = Tg + Th;
Chris@82 1103 T4H = Tg - Th;
Chris@82 1104 T1a = Ip[WS(rs, 2)];
Chris@82 1105 T1b = Im[WS(rs, 13)];
Chris@82 1106 T1c = T1a - T1b;
Chris@82 1107 T4F = T1a + T1b;
Chris@82 1108 }
Chris@82 1109 {
Chris@82 1110 E Tj, Tk, T1d, T1e;
Chris@82 1111 Tj = Rp[WS(rs, 10)];
Chris@82 1112 Tk = Rm[WS(rs, 5)];
Chris@82 1113 Tl = Tj + Tk;
Chris@82 1114 T4E = Tj - Tk;
Chris@82 1115 T1d = Ip[WS(rs, 10)];
Chris@82 1116 T1e = Im[WS(rs, 5)];
Chris@82 1117 T1f = T1d - T1e;
Chris@82 1118 T4I = T1d + T1e;
Chris@82 1119 }
Chris@82 1120 {
Chris@82 1121 E Tn, To, T13, T14;
Chris@82 1122 Tn = Rm[WS(rs, 1)];
Chris@82 1123 To = Rp[WS(rs, 14)];
Chris@82 1124 Tp = Tn + To;
Chris@82 1125 T4A = Tn - To;
Chris@82 1126 T13 = Ip[WS(rs, 14)];
Chris@82 1127 T14 = Im[WS(rs, 1)];
Chris@82 1128 T15 = T13 - T14;
Chris@82 1129 T4y = T13 + T14;
Chris@82 1130 }
Chris@82 1131 {
Chris@82 1132 E Tq, Tr, T16, T17;
Chris@82 1133 Tq = Rp[WS(rs, 6)];
Chris@82 1134 Tr = Rm[WS(rs, 9)];
Chris@82 1135 Ts = Tq + Tr;
Chris@82 1136 T4x = Tq - Tr;
Chris@82 1137 T16 = Ip[WS(rs, 6)];
Chris@82 1138 T17 = Im[WS(rs, 9)];
Chris@82 1139 T18 = T16 - T17;
Chris@82 1140 T4B = T16 + T17;
Chris@82 1141 }
Chris@82 1142 {
Chris@82 1143 E Tm, Tt, T6B, T6C;
Chris@82 1144 Tm = Ti + Tl;
Chris@82 1145 Tt = Tp + Ts;
Chris@82 1146 Tu = Tm + Tt;
Chris@82 1147 T1U = Tm - Tt;
Chris@82 1148 T6B = T4H + T4I;
Chris@82 1149 T6C = T4F - T4E;
Chris@82 1150 T6D = FNMS(KP923879532, T6C, KP382683432 * T6B);
Chris@82 1151 T73 = FMA(KP382683432, T6C, KP923879532 * T6B);
Chris@82 1152 }
Chris@82 1153 {
Chris@82 1154 E T6E, T6F, T19, T1g;
Chris@82 1155 T6E = T4A + T4B;
Chris@82 1156 T6F = T4x + T4y;
Chris@82 1157 T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
Chris@82 1158 T74 = FMA(KP382683432, T6F, KP923879532 * T6E);
Chris@82 1159 T19 = T15 + T18;
Chris@82 1160 T1g = T1c + T1f;
Chris@82 1161 T1h = T19 - T1g;
Chris@82 1162 T2z = T1g + T19;
Chris@82 1163 }
Chris@82 1164 {
Chris@82 1165 E T2V, T2W, T4z, T4C;
Chris@82 1166 T2V = T15 - T18;
Chris@82 1167 T2W = Tp - Ts;
Chris@82 1168 T2X = T2V - T2W;
Chris@82 1169 T3o = T2W + T2V;
Chris@82 1170 T4z = T4x - T4y;
Chris@82 1171 T4C = T4A - T4B;
Chris@82 1172 T4D = FNMS(KP382683432, T4C, KP923879532 * T4z);
Chris@82 1173 T5A = FMA(KP382683432, T4z, KP923879532 * T4C);
Chris@82 1174 }
Chris@82 1175 {
Chris@82 1176 E T4G, T4J, T2Y, T2Z;
Chris@82 1177 T4G = T4E + T4F;
Chris@82 1178 T4J = T4H - T4I;
Chris@82 1179 T4K = FMA(KP923879532, T4G, KP382683432 * T4J);
Chris@82 1180 T5z = FNMS(KP382683432, T4G, KP923879532 * T4J);
Chris@82 1181 T2Y = Ti - Tl;
Chris@82 1182 T2Z = T1c - T1f;
Chris@82 1183 T30 = T2Y + T2Z;
Chris@82 1184 T3n = T2Y - T2Z;
Chris@82 1185 }
Chris@82 1186 }
Chris@82 1187 {
Chris@82 1188 E Ty, T4N, T1m, T4Z, TB, T4Y, T1p, T4O, TI, T52, T1w, T4V, TF, T51, T1t;
Chris@82 1189 E T4S;
Chris@82 1190 {
Chris@82 1191 E Tw, Tx, T1n, T1o;
Chris@82 1192 Tw = Rp[WS(rs, 1)];
Chris@82 1193 Tx = Rm[WS(rs, 14)];
Chris@82 1194 Ty = Tw + Tx;
Chris@82 1195 T4N = Tw - Tx;
Chris@82 1196 {
Chris@82 1197 E T1k, T1l, Tz, TA;
Chris@82 1198 T1k = Ip[WS(rs, 1)];
Chris@82 1199 T1l = Im[WS(rs, 14)];
Chris@82 1200 T1m = T1k - T1l;
Chris@82 1201 T4Z = T1k + T1l;
Chris@82 1202 Tz = Rp[WS(rs, 9)];
Chris@82 1203 TA = Rm[WS(rs, 6)];
Chris@82 1204 TB = Tz + TA;
Chris@82 1205 T4Y = Tz - TA;
Chris@82 1206 }
Chris@82 1207 T1n = Ip[WS(rs, 9)];
Chris@82 1208 T1o = Im[WS(rs, 6)];
Chris@82 1209 T1p = T1n - T1o;
Chris@82 1210 T4O = T1n + T1o;
Chris@82 1211 {
Chris@82 1212 E TG, TH, T4T, T1u, T1v, T4U;
Chris@82 1213 TG = Rm[WS(rs, 2)];
Chris@82 1214 TH = Rp[WS(rs, 13)];
Chris@82 1215 T4T = TG - TH;
Chris@82 1216 T1u = Ip[WS(rs, 13)];
Chris@82 1217 T1v = Im[WS(rs, 2)];
Chris@82 1218 T4U = T1u + T1v;
Chris@82 1219 TI = TG + TH;
Chris@82 1220 T52 = T4T + T4U;
Chris@82 1221 T1w = T1u - T1v;
Chris@82 1222 T4V = T4T - T4U;
Chris@82 1223 }
Chris@82 1224 {
Chris@82 1225 E TD, TE, T4Q, T1r, T1s, T4R;
Chris@82 1226 TD = Rp[WS(rs, 5)];
Chris@82 1227 TE = Rm[WS(rs, 10)];
Chris@82 1228 T4Q = TD - TE;
Chris@82 1229 T1r = Ip[WS(rs, 5)];
Chris@82 1230 T1s = Im[WS(rs, 10)];
Chris@82 1231 T4R = T1r + T1s;
Chris@82 1232 TF = TD + TE;
Chris@82 1233 T51 = T4Q + T4R;
Chris@82 1234 T1t = T1r - T1s;
Chris@82 1235 T4S = T4Q - T4R;
Chris@82 1236 }
Chris@82 1237 }
Chris@82 1238 {
Chris@82 1239 E TC, TJ, T6Q, T6R;
Chris@82 1240 TC = Ty + TB;
Chris@82 1241 TJ = TF + TI;
Chris@82 1242 TK = TC + TJ;
Chris@82 1243 T1j = TC - TJ;
Chris@82 1244 T6Q = T4Z - T4Y;
Chris@82 1245 T6R = KP707106781 * (T4S - T4V);
Chris@82 1246 T6S = T6Q + T6R;
Chris@82 1247 T7w = T6Q - T6R;
Chris@82 1248 }
Chris@82 1249 {
Chris@82 1250 E T6T, T6U, T1q, T1x;
Chris@82 1251 T6T = T4N + T4O;
Chris@82 1252 T6U = KP707106781 * (T51 + T52);
Chris@82 1253 T6V = T6T - T6U;
Chris@82 1254 T7v = T6T + T6U;
Chris@82 1255 T1q = T1m + T1p;
Chris@82 1256 T1x = T1t + T1w;
Chris@82 1257 T1y = T1q - T1x;
Chris@82 1258 T2B = T1q + T1x;
Chris@82 1259 }
Chris@82 1260 {
Chris@82 1261 E T3a, T3b, T4P, T4W;
Chris@82 1262 T3a = T1m - T1p;
Chris@82 1263 T3b = TF - TI;
Chris@82 1264 T3c = T3a - T3b;
Chris@82 1265 T3S = T3b + T3a;
Chris@82 1266 T4P = T4N - T4O;
Chris@82 1267 T4W = KP707106781 * (T4S + T4V);
Chris@82 1268 T4X = T4P - T4W;
Chris@82 1269 T61 = T4P + T4W;
Chris@82 1270 }
Chris@82 1271 {
Chris@82 1272 E T50, T53, T3d, T3e;
Chris@82 1273 T50 = T4Y + T4Z;
Chris@82 1274 T53 = KP707106781 * (T51 - T52);
Chris@82 1275 T54 = T50 - T53;
Chris@82 1276 T62 = T50 + T53;
Chris@82 1277 T3d = Ty - TB;
Chris@82 1278 T3e = T1w - T1t;
Chris@82 1279 T3f = T3d - T3e;
Chris@82 1280 T3T = T3d + T3e;
Chris@82 1281 }
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E TN, T56, T1D, T5i, TQ, T5h, T1G, T57, TX, T5l, T1N, T5e, TU, T5k, T1K;
Chris@82 1285 E T5b;
Chris@82 1286 {
Chris@82 1287 E TL, TM, T1E, T1F;
Chris@82 1288 TL = Rm[0];
Chris@82 1289 TM = Rp[WS(rs, 15)];
Chris@82 1290 TN = TL + TM;
Chris@82 1291 T56 = TL - TM;
Chris@82 1292 {
Chris@82 1293 E T1B, T1C, TO, TP;
Chris@82 1294 T1B = Ip[WS(rs, 15)];
Chris@82 1295 T1C = Im[0];
Chris@82 1296 T1D = T1B - T1C;
Chris@82 1297 T5i = T1B + T1C;
Chris@82 1298 TO = Rp[WS(rs, 7)];
Chris@82 1299 TP = Rm[WS(rs, 8)];
Chris@82 1300 TQ = TO + TP;
Chris@82 1301 T5h = TO - TP;
Chris@82 1302 }
Chris@82 1303 T1E = Ip[WS(rs, 7)];
Chris@82 1304 T1F = Im[WS(rs, 8)];
Chris@82 1305 T1G = T1E - T1F;
Chris@82 1306 T57 = T1E + T1F;
Chris@82 1307 {
Chris@82 1308 E TV, TW, T5c, T1L, T1M, T5d;
Chris@82 1309 TV = Rm[WS(rs, 4)];
Chris@82 1310 TW = Rp[WS(rs, 11)];
Chris@82 1311 T5c = TV - TW;
Chris@82 1312 T1L = Ip[WS(rs, 11)];
Chris@82 1313 T1M = Im[WS(rs, 4)];
Chris@82 1314 T5d = T1L + T1M;
Chris@82 1315 TX = TV + TW;
Chris@82 1316 T5l = T5c + T5d;
Chris@82 1317 T1N = T1L - T1M;
Chris@82 1318 T5e = T5c - T5d;
Chris@82 1319 }
Chris@82 1320 {
Chris@82 1321 E TS, TT, T59, T1I, T1J, T5a;
Chris@82 1322 TS = Rp[WS(rs, 3)];
Chris@82 1323 TT = Rm[WS(rs, 12)];
Chris@82 1324 T59 = TS - TT;
Chris@82 1325 T1I = Ip[WS(rs, 3)];
Chris@82 1326 T1J = Im[WS(rs, 12)];
Chris@82 1327 T5a = T1I + T1J;
Chris@82 1328 TU = TS + TT;
Chris@82 1329 T5k = T59 + T5a;
Chris@82 1330 T1K = T1I - T1J;
Chris@82 1331 T5b = T59 - T5a;
Chris@82 1332 }
Chris@82 1333 }
Chris@82 1334 {
Chris@82 1335 E TR, TY, T6J, T6K;
Chris@82 1336 TR = TN + TQ;
Chris@82 1337 TY = TU + TX;
Chris@82 1338 TZ = TR + TY;
Chris@82 1339 T1A = TR - TY;
Chris@82 1340 T6J = KP707106781 * (T5b - T5e);
Chris@82 1341 T6K = T5h + T5i;
Chris@82 1342 T6L = T6J - T6K;
Chris@82 1343 T7z = T6K + T6J;
Chris@82 1344 }
Chris@82 1345 {
Chris@82 1346 E T6M, T6N, T1H, T1O;
Chris@82 1347 T6M = T56 + T57;
Chris@82 1348 T6N = KP707106781 * (T5k + T5l);
Chris@82 1349 T6O = T6M - T6N;
Chris@82 1350 T7y = T6M + T6N;
Chris@82 1351 T1H = T1D + T1G;
Chris@82 1352 T1O = T1K + T1N;
Chris@82 1353 T1P = T1H - T1O;
Chris@82 1354 T2C = T1H + T1O;
Chris@82 1355 }
Chris@82 1356 {
Chris@82 1357 E T33, T34, T58, T5f;
Chris@82 1358 T33 = T1D - T1G;
Chris@82 1359 T34 = TU - TX;
Chris@82 1360 T35 = T33 - T34;
Chris@82 1361 T3P = T34 + T33;
Chris@82 1362 T58 = T56 - T57;
Chris@82 1363 T5f = KP707106781 * (T5b + T5e);
Chris@82 1364 T5g = T58 - T5f;
Chris@82 1365 T64 = T58 + T5f;
Chris@82 1366 }
Chris@82 1367 {
Chris@82 1368 E T5j, T5m, T36, T37;
Chris@82 1369 T5j = T5h - T5i;
Chris@82 1370 T5m = KP707106781 * (T5k - T5l);
Chris@82 1371 T5n = T5j - T5m;
Chris@82 1372 T65 = T5j + T5m;
Chris@82 1373 T36 = TN - TQ;
Chris@82 1374 T37 = T1N - T1K;
Chris@82 1375 T38 = T36 - T37;
Chris@82 1376 T3Q = T36 + T37;
Chris@82 1377 }
Chris@82 1378 }
Chris@82 1379 {
Chris@82 1380 E Tv, T10, T2w, T2A, T2D, T2E, T2v, T2x;
Chris@82 1381 Tv = Tf + Tu;
Chris@82 1382 T10 = TK + TZ;
Chris@82 1383 T2w = Tv - T10;
Chris@82 1384 T2A = T2y + T2z;
Chris@82 1385 T2D = T2B + T2C;
Chris@82 1386 T2E = T2A - T2D;
Chris@82 1387 Rp[0] = Tv + T10;
Chris@82 1388 Rm[0] = T2A + T2D;
Chris@82 1389 T2v = W[30];
Chris@82 1390 T2x = W[31];
Chris@82 1391 Rp[WS(rs, 8)] = FNMS(T2x, T2E, T2v * T2w);
Chris@82 1392 Rm[WS(rs, 8)] = FMA(T2x, T2w, T2v * T2E);
Chris@82 1393 }
Chris@82 1394 {
Chris@82 1395 E T2I, T2O, T2M, T2Q;
Chris@82 1396 {
Chris@82 1397 E T2G, T2H, T2K, T2L;
Chris@82 1398 T2G = Tf - Tu;
Chris@82 1399 T2H = T2C - T2B;
Chris@82 1400 T2I = T2G - T2H;
Chris@82 1401 T2O = T2G + T2H;
Chris@82 1402 T2K = T2y - T2z;
Chris@82 1403 T2L = TK - TZ;
Chris@82 1404 T2M = T2K - T2L;
Chris@82 1405 T2Q = T2L + T2K;
Chris@82 1406 }
Chris@82 1407 {
Chris@82 1408 E T2F, T2J, T2N, T2P;
Chris@82 1409 T2F = W[46];
Chris@82 1410 T2J = W[47];
Chris@82 1411 Rp[WS(rs, 12)] = FNMS(T2J, T2M, T2F * T2I);
Chris@82 1412 Rm[WS(rs, 12)] = FMA(T2F, T2M, T2J * T2I);
Chris@82 1413 T2N = W[14];
Chris@82 1414 T2P = W[15];
Chris@82 1415 Rp[WS(rs, 4)] = FNMS(T2P, T2Q, T2N * T2O);
Chris@82 1416 Rm[WS(rs, 4)] = FMA(T2N, T2Q, T2P * T2O);
Chris@82 1417 }
Chris@82 1418 }
Chris@82 1419 {
Chris@82 1420 E T1i, T2a, T2o, T2k, T2d, T2l, T1R, T2p;
Chris@82 1421 T1i = T12 + T1h;
Chris@82 1422 T2a = T1U + T29;
Chris@82 1423 T2o = T29 - T1U;
Chris@82 1424 T2k = T12 - T1h;
Chris@82 1425 {
Chris@82 1426 E T2b, T2c, T1z, T1Q;
Chris@82 1427 T2b = T1j + T1y;
Chris@82 1428 T2c = T1P - T1A;
Chris@82 1429 T2d = KP707106781 * (T2b + T2c);
Chris@82 1430 T2l = KP707106781 * (T2c - T2b);
Chris@82 1431 T1z = T1j - T1y;
Chris@82 1432 T1Q = T1A + T1P;
Chris@82 1433 T1R = KP707106781 * (T1z + T1Q);
Chris@82 1434 T2p = KP707106781 * (T1z - T1Q);
Chris@82 1435 }
Chris@82 1436 {
Chris@82 1437 E T1S, T2e, T11, T1T;
Chris@82 1438 T1S = T1i - T1R;
Chris@82 1439 T2e = T2a - T2d;
Chris@82 1440 T11 = W[38];
Chris@82 1441 T1T = W[39];
Chris@82 1442 Rp[WS(rs, 10)] = FNMS(T1T, T2e, T11 * T1S);
Chris@82 1443 Rm[WS(rs, 10)] = FMA(T1T, T1S, T11 * T2e);
Chris@82 1444 }
Chris@82 1445 {
Chris@82 1446 E T2s, T2u, T2r, T2t;
Chris@82 1447 T2s = T2k + T2l;
Chris@82 1448 T2u = T2o + T2p;
Chris@82 1449 T2r = W[22];
Chris@82 1450 T2t = W[23];
Chris@82 1451 Rp[WS(rs, 6)] = FNMS(T2t, T2u, T2r * T2s);
Chris@82 1452 Rm[WS(rs, 6)] = FMA(T2r, T2u, T2t * T2s);
Chris@82 1453 }
Chris@82 1454 {
Chris@82 1455 E T2g, T2i, T2f, T2h;
Chris@82 1456 T2g = T1i + T1R;
Chris@82 1457 T2i = T2a + T2d;
Chris@82 1458 T2f = W[6];
Chris@82 1459 T2h = W[7];
Chris@82 1460 Rp[WS(rs, 2)] = FNMS(T2h, T2i, T2f * T2g);
Chris@82 1461 Rm[WS(rs, 2)] = FMA(T2h, T2g, T2f * T2i);
Chris@82 1462 }
Chris@82 1463 {
Chris@82 1464 E T2m, T2q, T2j, T2n;
Chris@82 1465 T2m = T2k - T2l;
Chris@82 1466 T2q = T2o - T2p;
Chris@82 1467 T2j = W[54];
Chris@82 1468 T2n = W[55];
Chris@82 1469 Rp[WS(rs, 14)] = FNMS(T2n, T2q, T2j * T2m);
Chris@82 1470 Rm[WS(rs, 14)] = FMA(T2j, T2q, T2n * T2m);
Chris@82 1471 }
Chris@82 1472 }
Chris@82 1473 {
Chris@82 1474 E T3O, T4a, T40, T4e, T3V, T4f, T43, T4b, T3N, T3Z;
Chris@82 1475 T3N = KP707106781 * (T3n + T3o);
Chris@82 1476 T3O = T3M - T3N;
Chris@82 1477 T4a = T3M + T3N;
Chris@82 1478 T3Z = KP707106781 * (T30 + T2X);
Chris@82 1479 T40 = T3Y - T3Z;
Chris@82 1480 T4e = T3Y + T3Z;
Chris@82 1481 {
Chris@82 1482 E T3R, T3U, T41, T42;
Chris@82 1483 T3R = FNMS(KP382683432, T3Q, KP923879532 * T3P);
Chris@82 1484 T3U = FMA(KP923879532, T3S, KP382683432 * T3T);
Chris@82 1485 T3V = T3R - T3U;
Chris@82 1486 T4f = T3U + T3R;
Chris@82 1487 T41 = FNMS(KP382683432, T3S, KP923879532 * T3T);
Chris@82 1488 T42 = FMA(KP382683432, T3P, KP923879532 * T3Q);
Chris@82 1489 T43 = T41 - T42;
Chris@82 1490 T4b = T41 + T42;
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T3W, T44, T3L, T3X;
Chris@82 1494 T3W = T3O - T3V;
Chris@82 1495 T44 = T40 - T43;
Chris@82 1496 T3L = W[50];
Chris@82 1497 T3X = W[51];
Chris@82 1498 Rp[WS(rs, 13)] = FNMS(T3X, T44, T3L * T3W);
Chris@82 1499 Rm[WS(rs, 13)] = FMA(T3X, T3W, T3L * T44);
Chris@82 1500 }
Chris@82 1501 {
Chris@82 1502 E T4i, T4k, T4h, T4j;
Chris@82 1503 T4i = T4a + T4b;
Chris@82 1504 T4k = T4e + T4f;
Chris@82 1505 T4h = W[2];
Chris@82 1506 T4j = W[3];
Chris@82 1507 Rp[WS(rs, 1)] = FNMS(T4j, T4k, T4h * T4i);
Chris@82 1508 Rm[WS(rs, 1)] = FMA(T4h, T4k, T4j * T4i);
Chris@82 1509 }
Chris@82 1510 {
Chris@82 1511 E T46, T48, T45, T47;
Chris@82 1512 T46 = T3O + T3V;
Chris@82 1513 T48 = T40 + T43;
Chris@82 1514 T45 = W[18];
Chris@82 1515 T47 = W[19];
Chris@82 1516 Rp[WS(rs, 5)] = FNMS(T47, T48, T45 * T46);
Chris@82 1517 Rm[WS(rs, 5)] = FMA(T47, T46, T45 * T48);
Chris@82 1518 }
Chris@82 1519 {
Chris@82 1520 E T4c, T4g, T49, T4d;
Chris@82 1521 T4c = T4a - T4b;
Chris@82 1522 T4g = T4e - T4f;
Chris@82 1523 T49 = W[34];
Chris@82 1524 T4d = W[35];
Chris@82 1525 Rp[WS(rs, 9)] = FNMS(T4d, T4g, T49 * T4c);
Chris@82 1526 Rm[WS(rs, 9)] = FMA(T49, T4g, T4d * T4c);
Chris@82 1527 }
Chris@82 1528 }
Chris@82 1529 {
Chris@82 1530 E T32, T3A, T3q, T3E, T3h, T3F, T3t, T3B, T31, T3p;
Chris@82 1531 T31 = KP707106781 * (T2X - T30);
Chris@82 1532 T32 = T2U - T31;
Chris@82 1533 T3A = T2U + T31;
Chris@82 1534 T3p = KP707106781 * (T3n - T3o);
Chris@82 1535 T3q = T3m - T3p;
Chris@82 1536 T3E = T3m + T3p;
Chris@82 1537 {
Chris@82 1538 E T39, T3g, T3r, T3s;
Chris@82 1539 T39 = FNMS(KP923879532, T38, KP382683432 * T35);
Chris@82 1540 T3g = FMA(KP382683432, T3c, KP923879532 * T3f);
Chris@82 1541 T3h = T39 - T3g;
Chris@82 1542 T3F = T3g + T39;
Chris@82 1543 T3r = FNMS(KP923879532, T3c, KP382683432 * T3f);
Chris@82 1544 T3s = FMA(KP923879532, T35, KP382683432 * T38);
Chris@82 1545 T3t = T3r - T3s;
Chris@82 1546 T3B = T3r + T3s;
Chris@82 1547 }
Chris@82 1548 {
Chris@82 1549 E T3i, T3u, T2R, T3j;
Chris@82 1550 T3i = T32 - T3h;
Chris@82 1551 T3u = T3q - T3t;
Chris@82 1552 T2R = W[58];
Chris@82 1553 T3j = W[59];
Chris@82 1554 Rp[WS(rs, 15)] = FNMS(T3j, T3u, T2R * T3i);
Chris@82 1555 Rm[WS(rs, 15)] = FMA(T3j, T3i, T2R * T3u);
Chris@82 1556 }
Chris@82 1557 {
Chris@82 1558 E T3I, T3K, T3H, T3J;
Chris@82 1559 T3I = T3A + T3B;
Chris@82 1560 T3K = T3E + T3F;
Chris@82 1561 T3H = W[10];
Chris@82 1562 T3J = W[11];
Chris@82 1563 Rp[WS(rs, 3)] = FNMS(T3J, T3K, T3H * T3I);
Chris@82 1564 Rm[WS(rs, 3)] = FMA(T3H, T3K, T3J * T3I);
Chris@82 1565 }
Chris@82 1566 {
Chris@82 1567 E T3w, T3y, T3v, T3x;
Chris@82 1568 T3w = T32 + T3h;
Chris@82 1569 T3y = T3q + T3t;
Chris@82 1570 T3v = W[26];
Chris@82 1571 T3x = W[27];
Chris@82 1572 Rp[WS(rs, 7)] = FNMS(T3x, T3y, T3v * T3w);
Chris@82 1573 Rm[WS(rs, 7)] = FMA(T3x, T3w, T3v * T3y);
Chris@82 1574 }
Chris@82 1575 {
Chris@82 1576 E T3C, T3G, T3z, T3D;
Chris@82 1577 T3C = T3A - T3B;
Chris@82 1578 T3G = T3E - T3F;
Chris@82 1579 T3z = W[42];
Chris@82 1580 T3D = W[43];
Chris@82 1581 Rp[WS(rs, 11)] = FNMS(T3D, T3G, T3z * T3C);
Chris@82 1582 Rm[WS(rs, 11)] = FMA(T3z, T3G, T3D * T3C);
Chris@82 1583 }
Chris@82 1584 }
Chris@82 1585 {
Chris@82 1586 E T60, T6m, T6f, T6n, T67, T6r, T6c, T6q;
Chris@82 1587 {
Chris@82 1588 E T5Y, T5Z, T6d, T6e;
Chris@82 1589 T5Y = T4o + T4v;
Chris@82 1590 T5Z = T5z + T5A;
Chris@82 1591 T60 = T5Y + T5Z;
Chris@82 1592 T6m = T5Y - T5Z;
Chris@82 1593 T6d = FMA(KP195090322, T61, KP980785280 * T62);
Chris@82 1594 T6e = FNMS(KP195090322, T64, KP980785280 * T65);
Chris@82 1595 T6f = T6d + T6e;
Chris@82 1596 T6n = T6e - T6d;
Chris@82 1597 }
Chris@82 1598 {
Chris@82 1599 E T63, T66, T6a, T6b;
Chris@82 1600 T63 = FNMS(KP195090322, T62, KP980785280 * T61);
Chris@82 1601 T66 = FMA(KP980785280, T64, KP195090322 * T65);
Chris@82 1602 T67 = T63 + T66;
Chris@82 1603 T6r = T63 - T66;
Chris@82 1604 T6a = T5u + T5x;
Chris@82 1605 T6b = T4K + T4D;
Chris@82 1606 T6c = T6a + T6b;
Chris@82 1607 T6q = T6a - T6b;
Chris@82 1608 }
Chris@82 1609 {
Chris@82 1610 E T68, T6g, T5X, T69;
Chris@82 1611 T68 = T60 - T67;
Chris@82 1612 T6g = T6c - T6f;
Chris@82 1613 T5X = W[32];
Chris@82 1614 T69 = W[33];
Chris@82 1615 Ip[WS(rs, 8)] = FNMS(T69, T6g, T5X * T68);
Chris@82 1616 Im[WS(rs, 8)] = FMA(T69, T68, T5X * T6g);
Chris@82 1617 }
Chris@82 1618 {
Chris@82 1619 E T6u, T6w, T6t, T6v;
Chris@82 1620 T6u = T6m + T6n;
Chris@82 1621 T6w = T6q + T6r;
Chris@82 1622 T6t = W[16];
Chris@82 1623 T6v = W[17];
Chris@82 1624 Ip[WS(rs, 4)] = FNMS(T6v, T6w, T6t * T6u);
Chris@82 1625 Im[WS(rs, 4)] = FMA(T6t, T6w, T6v * T6u);
Chris@82 1626 }
Chris@82 1627 {
Chris@82 1628 E T6i, T6k, T6h, T6j;
Chris@82 1629 T6i = T60 + T67;
Chris@82 1630 T6k = T6c + T6f;
Chris@82 1631 T6h = W[0];
Chris@82 1632 T6j = W[1];
Chris@82 1633 Ip[0] = FNMS(T6j, T6k, T6h * T6i);
Chris@82 1634 Im[0] = FMA(T6j, T6i, T6h * T6k);
Chris@82 1635 }
Chris@82 1636 {
Chris@82 1637 E T6o, T6s, T6l, T6p;
Chris@82 1638 T6o = T6m - T6n;
Chris@82 1639 T6s = T6q - T6r;
Chris@82 1640 T6l = W[48];
Chris@82 1641 T6p = W[49];
Chris@82 1642 Ip[WS(rs, 12)] = FNMS(T6p, T6s, T6l * T6o);
Chris@82 1643 Im[WS(rs, 12)] = FMA(T6l, T6s, T6p * T6o);
Chris@82 1644 }
Chris@82 1645 }
Chris@82 1646 {
Chris@82 1647 E T7u, T7Q, T7J, T7R, T7B, T7V, T7G, T7U;
Chris@82 1648 {
Chris@82 1649 E T7s, T7t, T7H, T7I;
Chris@82 1650 T7s = T6y + T6z;
Chris@82 1651 T7t = T73 + T74;
Chris@82 1652 T7u = T7s - T7t;
Chris@82 1653 T7Q = T7s + T7t;
Chris@82 1654 T7H = FMA(KP195090322, T7w, KP980785280 * T7v);
Chris@82 1655 T7I = FMA(KP195090322, T7z, KP980785280 * T7y);
Chris@82 1656 T7J = T7H - T7I;
Chris@82 1657 T7R = T7H + T7I;
Chris@82 1658 }
Chris@82 1659 {
Chris@82 1660 E T7x, T7A, T7E, T7F;
Chris@82 1661 T7x = FNMS(KP980785280, T7w, KP195090322 * T7v);
Chris@82 1662 T7A = FNMS(KP980785280, T7z, KP195090322 * T7y);
Chris@82 1663 T7B = T7x + T7A;
Chris@82 1664 T7V = T7x - T7A;
Chris@82 1665 T7E = T70 - T71;
Chris@82 1666 T7F = T6D - T6G;
Chris@82 1667 T7G = T7E + T7F;
Chris@82 1668 T7U = T7E - T7F;
Chris@82 1669 }
Chris@82 1670 {
Chris@82 1671 E T7C, T7K, T7r, T7D;
Chris@82 1672 T7C = T7u - T7B;
Chris@82 1673 T7K = T7G - T7J;
Chris@82 1674 T7r = W[44];
Chris@82 1675 T7D = W[45];
Chris@82 1676 Ip[WS(rs, 11)] = FNMS(T7D, T7K, T7r * T7C);
Chris@82 1677 Im[WS(rs, 11)] = FMA(T7D, T7C, T7r * T7K);
Chris@82 1678 }
Chris@82 1679 {
Chris@82 1680 E T7Y, T80, T7X, T7Z;
Chris@82 1681 T7Y = T7Q + T7R;
Chris@82 1682 T80 = T7U - T7V;
Chris@82 1683 T7X = W[60];
Chris@82 1684 T7Z = W[61];
Chris@82 1685 Ip[WS(rs, 15)] = FNMS(T7Z, T80, T7X * T7Y);
Chris@82 1686 Im[WS(rs, 15)] = FMA(T7X, T80, T7Z * T7Y);
Chris@82 1687 }
Chris@82 1688 {
Chris@82 1689 E T7M, T7O, T7L, T7N;
Chris@82 1690 T7M = T7u + T7B;
Chris@82 1691 T7O = T7G + T7J;
Chris@82 1692 T7L = W[12];
Chris@82 1693 T7N = W[13];
Chris@82 1694 Ip[WS(rs, 3)] = FNMS(T7N, T7O, T7L * T7M);
Chris@82 1695 Im[WS(rs, 3)] = FMA(T7N, T7M, T7L * T7O);
Chris@82 1696 }
Chris@82 1697 {
Chris@82 1698 E T7S, T7W, T7P, T7T;
Chris@82 1699 T7S = T7Q - T7R;
Chris@82 1700 T7W = T7U + T7V;
Chris@82 1701 T7P = W[28];
Chris@82 1702 T7T = W[29];
Chris@82 1703 Ip[WS(rs, 7)] = FNMS(T7T, T7W, T7P * T7S);
Chris@82 1704 Im[WS(rs, 7)] = FMA(T7P, T7W, T7T * T7S);
Chris@82 1705 }
Chris@82 1706 }
Chris@82 1707 {
Chris@82 1708 E T4M, T5M, T5F, T5N, T5p, T5R, T5C, T5Q;
Chris@82 1709 {
Chris@82 1710 E T4w, T4L, T5D, T5E;
Chris@82 1711 T4w = T4o - T4v;
Chris@82 1712 T4L = T4D - T4K;
Chris@82 1713 T4M = T4w + T4L;
Chris@82 1714 T5M = T4w - T4L;
Chris@82 1715 T5D = FMA(KP831469612, T4X, KP555570233 * T54);
Chris@82 1716 T5E = FNMS(KP831469612, T5g, KP555570233 * T5n);
Chris@82 1717 T5F = T5D + T5E;
Chris@82 1718 T5N = T5E - T5D;
Chris@82 1719 }
Chris@82 1720 {
Chris@82 1721 E T55, T5o, T5y, T5B;
Chris@82 1722 T55 = FNMS(KP831469612, T54, KP555570233 * T4X);
Chris@82 1723 T5o = FMA(KP555570233, T5g, KP831469612 * T5n);
Chris@82 1724 T5p = T55 + T5o;
Chris@82 1725 T5R = T55 - T5o;
Chris@82 1726 T5y = T5u - T5x;
Chris@82 1727 T5B = T5z - T5A;
Chris@82 1728 T5C = T5y + T5B;
Chris@82 1729 T5Q = T5y - T5B;
Chris@82 1730 }
Chris@82 1731 {
Chris@82 1732 E T5q, T5G, T4l, T5r;
Chris@82 1733 T5q = T4M - T5p;
Chris@82 1734 T5G = T5C - T5F;
Chris@82 1735 T4l = W[40];
Chris@82 1736 T5r = W[41];
Chris@82 1737 Ip[WS(rs, 10)] = FNMS(T5r, T5G, T4l * T5q);
Chris@82 1738 Im[WS(rs, 10)] = FMA(T5r, T5q, T4l * T5G);
Chris@82 1739 }
Chris@82 1740 {
Chris@82 1741 E T5U, T5W, T5T, T5V;
Chris@82 1742 T5U = T5M + T5N;
Chris@82 1743 T5W = T5Q + T5R;
Chris@82 1744 T5T = W[24];
Chris@82 1745 T5V = W[25];
Chris@82 1746 Ip[WS(rs, 6)] = FNMS(T5V, T5W, T5T * T5U);
Chris@82 1747 Im[WS(rs, 6)] = FMA(T5T, T5W, T5V * T5U);
Chris@82 1748 }
Chris@82 1749 {
Chris@82 1750 E T5I, T5K, T5H, T5J;
Chris@82 1751 T5I = T4M + T5p;
Chris@82 1752 T5K = T5C + T5F;
Chris@82 1753 T5H = W[8];
Chris@82 1754 T5J = W[9];
Chris@82 1755 Ip[WS(rs, 2)] = FNMS(T5J, T5K, T5H * T5I);
Chris@82 1756 Im[WS(rs, 2)] = FMA(T5J, T5I, T5H * T5K);
Chris@82 1757 }
Chris@82 1758 {
Chris@82 1759 E T5O, T5S, T5L, T5P;
Chris@82 1760 T5O = T5M - T5N;
Chris@82 1761 T5S = T5Q - T5R;
Chris@82 1762 T5L = W[56];
Chris@82 1763 T5P = W[57];
Chris@82 1764 Ip[WS(rs, 14)] = FNMS(T5P, T5S, T5L * T5O);
Chris@82 1765 Im[WS(rs, 14)] = FMA(T5L, T5S, T5P * T5O);
Chris@82 1766 }
Chris@82 1767 }
Chris@82 1768 {
Chris@82 1769 E T6I, T7g, T79, T7h, T6X, T7l, T76, T7k;
Chris@82 1770 {
Chris@82 1771 E T6A, T6H, T77, T78;
Chris@82 1772 T6A = T6y - T6z;
Chris@82 1773 T6H = T6D + T6G;
Chris@82 1774 T6I = T6A - T6H;
Chris@82 1775 T7g = T6A + T6H;
Chris@82 1776 T77 = FNMS(KP555570233, T6S, KP831469612 * T6V);
Chris@82 1777 T78 = FMA(KP555570233, T6L, KP831469612 * T6O);
Chris@82 1778 T79 = T77 - T78;
Chris@82 1779 T7h = T77 + T78;
Chris@82 1780 }
Chris@82 1781 {
Chris@82 1782 E T6P, T6W, T72, T75;
Chris@82 1783 T6P = FNMS(KP555570233, T6O, KP831469612 * T6L);
Chris@82 1784 T6W = FMA(KP831469612, T6S, KP555570233 * T6V);
Chris@82 1785 T6X = T6P - T6W;
Chris@82 1786 T7l = T6W + T6P;
Chris@82 1787 T72 = T70 + T71;
Chris@82 1788 T75 = T73 - T74;
Chris@82 1789 T76 = T72 - T75;
Chris@82 1790 T7k = T72 + T75;
Chris@82 1791 }
Chris@82 1792 {
Chris@82 1793 E T6Y, T7a, T6x, T6Z;
Chris@82 1794 T6Y = T6I - T6X;
Chris@82 1795 T7a = T76 - T79;
Chris@82 1796 T6x = W[52];
Chris@82 1797 T6Z = W[53];
Chris@82 1798 Ip[WS(rs, 13)] = FNMS(T6Z, T7a, T6x * T6Y);
Chris@82 1799 Im[WS(rs, 13)] = FMA(T6Z, T6Y, T6x * T7a);
Chris@82 1800 }
Chris@82 1801 {
Chris@82 1802 E T7o, T7q, T7n, T7p;
Chris@82 1803 T7o = T7g + T7h;
Chris@82 1804 T7q = T7k + T7l;
Chris@82 1805 T7n = W[4];
Chris@82 1806 T7p = W[5];
Chris@82 1807 Ip[WS(rs, 1)] = FNMS(T7p, T7q, T7n * T7o);
Chris@82 1808 Im[WS(rs, 1)] = FMA(T7n, T7q, T7p * T7o);
Chris@82 1809 }
Chris@82 1810 {
Chris@82 1811 E T7c, T7e, T7b, T7d;
Chris@82 1812 T7c = T6I + T6X;
Chris@82 1813 T7e = T76 + T79;
Chris@82 1814 T7b = W[20];
Chris@82 1815 T7d = W[21];
Chris@82 1816 Ip[WS(rs, 5)] = FNMS(T7d, T7e, T7b * T7c);
Chris@82 1817 Im[WS(rs, 5)] = FMA(T7d, T7c, T7b * T7e);
Chris@82 1818 }
Chris@82 1819 {
Chris@82 1820 E T7i, T7m, T7f, T7j;
Chris@82 1821 T7i = T7g - T7h;
Chris@82 1822 T7m = T7k - T7l;
Chris@82 1823 T7f = W[36];
Chris@82 1824 T7j = W[37];
Chris@82 1825 Ip[WS(rs, 9)] = FNMS(T7j, T7m, T7f * T7i);
Chris@82 1826 Im[WS(rs, 9)] = FMA(T7f, T7m, T7j * T7i);
Chris@82 1827 }
Chris@82 1828 }
Chris@82 1829 }
Chris@82 1830 }
Chris@82 1831 }
Chris@82 1832
Chris@82 1833 static const tw_instr twinstr[] = {
Chris@82 1834 {TW_FULL, 1, 32},
Chris@82 1835 {TW_NEXT, 1, 0}
Chris@82 1836 };
Chris@82 1837
Chris@82 1838 static const hc2c_desc desc = { 32, "hc2cb_32", twinstr, &GENUS, {340, 114, 94, 0} };
Chris@82 1839
Chris@82 1840 void X(codelet_hc2cb_32) (planner *p) {
Chris@82 1841 X(khc2c_register) (p, hc2cb_32, &desc, HC2C_VIA_RDFT);
Chris@82 1842 }
Chris@82 1843 #endif