annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:17 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 552 FP additions, 414 FP multiplications,
Chris@82 32 * (or, 300 additions, 162 multiplications, 252 fused multiply/add),
Chris@82 33 * 175 stack variables, 8 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 40 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 41 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 47 {
Chris@82 48 INT m;
Chris@82 49 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 50 E T1, Th, T2, T5, Ti, Tl, T4, T6, T1a, Tc, T1c, Tk, Tz, T2H, T2v;
Chris@82 51 E T1u, Tm, Ts, T15, T2W, TZ, T2l, T2q, T2R, TR, TL, T3B, T3S, T3F, T3V;
Chris@82 52 E T4E, T4Y, T4I, T51, TF, T40, T44, T2A, T4M, T4Q, T1A, T3s, T3w, T2M, T4l;
Chris@82 53 E T4p, T1g, T1H, T1F, T1d, T1h, T1O, T1n, T1I, T28, T34, T32, T25, T29, T3b;
Chris@82 54 E T2f, T35;
Chris@82 55 {
Chris@82 56 E Tj, TY, TK, Tr, T14, TQ, T1b, T24, TE, T1z;
Chris@82 57 {
Chris@82 58 E T3, T1t, Tb, Ty;
Chris@82 59 T1 = W[0];
Chris@82 60 Th = W[4];
Chris@82 61 T2 = W[2];
Chris@82 62 T5 = W[3];
Chris@82 63 T3 = T1 * T2;
Chris@82 64 T1t = T2 * Th;
Chris@82 65 Tb = T1 * T5;
Chris@82 66 Ty = T1 * Th;
Chris@82 67 Ti = W[6];
Chris@82 68 Tj = Th * Ti;
Chris@82 69 TY = T2 * Ti;
Chris@82 70 TK = T1 * Ti;
Chris@82 71 Tl = W[7];
Chris@82 72 Tr = Th * Tl;
Chris@82 73 T14 = T2 * Tl;
Chris@82 74 TQ = T1 * Tl;
Chris@82 75 T4 = W[1];
Chris@82 76 T6 = FMA(T4, T5, T3);
Chris@82 77 T1a = FNMS(T4, T5, T3);
Chris@82 78 T1b = T1a * Th;
Chris@82 79 T24 = T6 * Th;
Chris@82 80 Tc = FNMS(T4, T2, Tb);
Chris@82 81 T1c = FMA(T4, T2, Tb);
Chris@82 82 Tk = W[5];
Chris@82 83 TE = T1 * Tk;
Chris@82 84 T1z = T2 * Tk;
Chris@82 85 Tz = FNMS(T4, Tk, Ty);
Chris@82 86 T2H = FMA(T4, Tk, Ty);
Chris@82 87 T2v = FNMS(T5, Tk, T1t);
Chris@82 88 T1u = FMA(T5, Tk, T1t);
Chris@82 89 }
Chris@82 90 Tm = FMA(Tk, Tl, Tj);
Chris@82 91 Ts = FNMS(Tk, Ti, Tr);
Chris@82 92 T15 = FMA(T5, Ti, T14);
Chris@82 93 T2W = FNMS(T5, Ti, T14);
Chris@82 94 TZ = FNMS(T5, Tl, TY);
Chris@82 95 T2l = FNMS(T4, Tl, TK);
Chris@82 96 T2q = FMA(T4, Ti, TQ);
Chris@82 97 T2R = FMA(T5, Tl, TY);
Chris@82 98 TR = FNMS(T4, Ti, TQ);
Chris@82 99 TL = FMA(T4, Tl, TK);
Chris@82 100 {
Chris@82 101 E T3A, T3E, T4k, T4o;
Chris@82 102 T3A = T6 * Ti;
Chris@82 103 T3B = FNMS(Tc, Tl, T3A);
Chris@82 104 T3S = FMA(Tc, Tl, T3A);
Chris@82 105 T3E = T6 * Tl;
Chris@82 106 T3F = FMA(Tc, Ti, T3E);
Chris@82 107 T3V = FNMS(Tc, Ti, T3E);
Chris@82 108 {
Chris@82 109 E T4D, T4H, T3Z, T43;
Chris@82 110 T4D = T1a * Ti;
Chris@82 111 T4E = FNMS(T1c, Tl, T4D);
Chris@82 112 T4Y = FMA(T1c, Tl, T4D);
Chris@82 113 T4H = T1a * Tl;
Chris@82 114 T4I = FMA(T1c, Ti, T4H);
Chris@82 115 T51 = FNMS(T1c, Ti, T4H);
Chris@82 116 T3Z = Tz * Ti;
Chris@82 117 T43 = Tz * Tl;
Chris@82 118 TF = FMA(T4, Th, TE);
Chris@82 119 T40 = FMA(TF, Tl, T3Z);
Chris@82 120 T44 = FNMS(TF, Ti, T43);
Chris@82 121 }
Chris@82 122 {
Chris@82 123 E T4L, T4P, T3r, T3v;
Chris@82 124 T4L = T2v * Ti;
Chris@82 125 T4P = T2v * Tl;
Chris@82 126 T2A = FMA(T5, Th, T1z);
Chris@82 127 T4M = FMA(T2A, Tl, T4L);
Chris@82 128 T4Q = FNMS(T2A, Ti, T4P);
Chris@82 129 T3r = T1u * Ti;
Chris@82 130 T3v = T1u * Tl;
Chris@82 131 T1A = FNMS(T5, Th, T1z);
Chris@82 132 T3s = FMA(T1A, Tl, T3r);
Chris@82 133 T3w = FNMS(T1A, Ti, T3v);
Chris@82 134 }
Chris@82 135 T4k = T2H * Ti;
Chris@82 136 T4o = T2H * Tl;
Chris@82 137 T2M = FNMS(T4, Th, TE);
Chris@82 138 T4l = FMA(T2M, Tl, T4k);
Chris@82 139 T4p = FNMS(T2M, Ti, T4o);
Chris@82 140 {
Chris@82 141 E T1G, T1N, T1e, T1m, T1f;
Chris@82 142 T1f = T1a * Tk;
Chris@82 143 T1g = FMA(T1c, Th, T1f);
Chris@82 144 T1H = FNMS(T1c, Th, T1f);
Chris@82 145 T1F = FMA(T1c, Tk, T1b);
Chris@82 146 T1G = T1F * Ti;
Chris@82 147 T1N = T1F * Tl;
Chris@82 148 T1d = FNMS(T1c, Tk, T1b);
Chris@82 149 T1e = T1d * Ti;
Chris@82 150 T1m = T1d * Tl;
Chris@82 151 T1h = FMA(T1g, Tl, T1e);
Chris@82 152 T1O = FNMS(T1H, Ti, T1N);
Chris@82 153 T1n = FNMS(T1g, Ti, T1m);
Chris@82 154 T1I = FMA(T1H, Tl, T1G);
Chris@82 155 }
Chris@82 156 {
Chris@82 157 E T33, T3a, T26, T2e, T27;
Chris@82 158 T27 = T6 * Tk;
Chris@82 159 T28 = FNMS(Tc, Th, T27);
Chris@82 160 T34 = FMA(Tc, Th, T27);
Chris@82 161 T32 = FNMS(Tc, Tk, T24);
Chris@82 162 T33 = T32 * Ti;
Chris@82 163 T3a = T32 * Tl;
Chris@82 164 T25 = FMA(Tc, Tk, T24);
Chris@82 165 T26 = T25 * Ti;
Chris@82 166 T2e = T25 * Tl;
Chris@82 167 T29 = FMA(T28, Tl, T26);
Chris@82 168 T3b = FNMS(T34, Ti, T3a);
Chris@82 169 T2f = FNMS(T28, Ti, T2e);
Chris@82 170 T35 = FMA(T34, Tl, T33);
Chris@82 171 }
Chris@82 172 }
Chris@82 173 }
Chris@82 174 {
Chris@82 175 E T3j, T7Z, T5b, T93, T4d, T8J, T6B, T8V, T1T, T8l, T6e, T8r, T54, T8C, T5O;
Chris@82 176 E T8i, T31, T94, T6w, T8K, T3Y, T8U, T5g, T80, T1s, T8h, T69, T8B, T4T, T8q;
Chris@82 177 E T5J, T8k, Tx, T8a, T5y, T8d, T4s, T8E, T5Y, T8v, T2k, T82, T5m, T83, T3z;
Chris@82 178 E T8X, T6l, T8O, T2F, T86, T5r, T85, T3M, T8Y, T6q, T8R, TW, T8e, T5D, T8b;
Chris@82 179 E T4B, T8F, T63, T8y;
Chris@82 180 {
Chris@82 181 E T3i, T4b, T38, T39, T45, T4a, T6z, T58, T3e, T42, T6x, T59, T3f, T5a;
Chris@82 182 {
Chris@82 183 E T3g, T3h, T36, T37;
Chris@82 184 T3g = Ip[0];
Chris@82 185 T3h = Im[0];
Chris@82 186 T3i = T3g - T3h;
Chris@82 187 T4b = T3g + T3h;
Chris@82 188 T36 = Ip[WS(rs, 8)];
Chris@82 189 T37 = Im[WS(rs, 8)];
Chris@82 190 T38 = T36 - T37;
Chris@82 191 T39 = T35 * T38;
Chris@82 192 T45 = T36 + T37;
Chris@82 193 }
Chris@82 194 {
Chris@82 195 E T47, T48, T49, T41, T3c, T3d;
Chris@82 196 T47 = Rm[0];
Chris@82 197 T48 = Rp[0];
Chris@82 198 T49 = T47 - T48;
Chris@82 199 T4a = T1 * T49;
Chris@82 200 T6z = T4 * T49;
Chris@82 201 T58 = T48 + T47;
Chris@82 202 T3c = Rp[WS(rs, 8)];
Chris@82 203 T3d = Rm[WS(rs, 8)];
Chris@82 204 T3e = T3c + T3d;
Chris@82 205 T41 = T3d - T3c;
Chris@82 206 T42 = T40 * T41;
Chris@82 207 T6x = T44 * T41;
Chris@82 208 T59 = T35 * T3e;
Chris@82 209 }
Chris@82 210 T3f = FNMS(T3b, T3e, T39);
Chris@82 211 T3j = T3f + T3i;
Chris@82 212 T7Z = T3i - T3f;
Chris@82 213 T5a = FMA(T3b, T38, T59);
Chris@82 214 T5b = T58 + T5a;
Chris@82 215 T93 = T58 - T5a;
Chris@82 216 {
Chris@82 217 E T46, T4c, T6y, T6A;
Chris@82 218 T46 = FNMS(T44, T45, T42);
Chris@82 219 T4c = FNMS(T4, T4b, T4a);
Chris@82 220 T4d = T46 + T4c;
Chris@82 221 T8J = T4c - T46;
Chris@82 222 T6y = FMA(T40, T45, T6x);
Chris@82 223 T6A = FMA(T1, T4b, T6z);
Chris@82 224 T6B = T6y + T6A;
Chris@82 225 T8V = T6A - T6y;
Chris@82 226 }
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E T1x, T4W, T1y, T6a, T1D, T4U, T4V, T5K, T1L, T52, T1M, T6c, T1R, T4Z, T50;
Chris@82 230 E T5M;
Chris@82 231 {
Chris@82 232 E T1v, T1w, T1B, T1C;
Chris@82 233 T1v = Ip[WS(rs, 3)];
Chris@82 234 T1w = Im[WS(rs, 3)];
Chris@82 235 T1x = T1v - T1w;
Chris@82 236 T4W = T1v + T1w;
Chris@82 237 T1y = T1u * T1x;
Chris@82 238 T6a = T25 * T4W;
Chris@82 239 T1B = Rp[WS(rs, 3)];
Chris@82 240 T1C = Rm[WS(rs, 3)];
Chris@82 241 T1D = T1B + T1C;
Chris@82 242 T4U = T1B - T1C;
Chris@82 243 T4V = T25 * T4U;
Chris@82 244 T5K = T1u * T1D;
Chris@82 245 }
Chris@82 246 {
Chris@82 247 E T1J, T1K, T1P, T1Q;
Chris@82 248 T1J = Ip[WS(rs, 11)];
Chris@82 249 T1K = Im[WS(rs, 11)];
Chris@82 250 T1L = T1J - T1K;
Chris@82 251 T52 = T1J + T1K;
Chris@82 252 T1M = T1I * T1L;
Chris@82 253 T6c = T4Y * T52;
Chris@82 254 T1P = Rp[WS(rs, 11)];
Chris@82 255 T1Q = Rm[WS(rs, 11)];
Chris@82 256 T1R = T1P + T1Q;
Chris@82 257 T4Z = T1P - T1Q;
Chris@82 258 T50 = T4Y * T4Z;
Chris@82 259 T5M = T1I * T1R;
Chris@82 260 }
Chris@82 261 {
Chris@82 262 E T1E, T1S, T6b, T6d;
Chris@82 263 T1E = FNMS(T1A, T1D, T1y);
Chris@82 264 T1S = FNMS(T1O, T1R, T1M);
Chris@82 265 T1T = T1E + T1S;
Chris@82 266 T8l = T1E - T1S;
Chris@82 267 T6b = FNMS(T28, T4U, T6a);
Chris@82 268 T6d = FNMS(T51, T4Z, T6c);
Chris@82 269 T6e = T6b + T6d;
Chris@82 270 T8r = T6d - T6b;
Chris@82 271 }
Chris@82 272 {
Chris@82 273 E T4X, T53, T5L, T5N;
Chris@82 274 T4X = FMA(T28, T4W, T4V);
Chris@82 275 T53 = FMA(T51, T52, T50);
Chris@82 276 T54 = T4X + T53;
Chris@82 277 T8C = T53 - T4X;
Chris@82 278 T5L = FMA(T1A, T1x, T5K);
Chris@82 279 T5N = FMA(T1O, T1L, T5M);
Chris@82 280 T5O = T5L + T5N;
Chris@82 281 T8i = T5L - T5N;
Chris@82 282 }
Chris@82 283 }
Chris@82 284 {
Chris@82 285 E T2K, T2L, T3Q, T2P, T3P, T6s, T5c, T2U, T2V, T3W, T2Z, T3U, T6u, T5e;
Chris@82 286 {
Chris@82 287 E T2I, T2J, T3O, T2N, T2O;
Chris@82 288 T2I = Ip[WS(rs, 4)];
Chris@82 289 T2J = Im[WS(rs, 4)];
Chris@82 290 T2K = T2I - T2J;
Chris@82 291 T2L = T2H * T2K;
Chris@82 292 T3Q = T2I + T2J;
Chris@82 293 T2N = Rp[WS(rs, 4)];
Chris@82 294 T2O = Rm[WS(rs, 4)];
Chris@82 295 T2P = T2N + T2O;
Chris@82 296 T3O = T2O - T2N;
Chris@82 297 T3P = Th * T3O;
Chris@82 298 T6s = Tk * T3O;
Chris@82 299 T5c = T2H * T2P;
Chris@82 300 }
Chris@82 301 {
Chris@82 302 E T2S, T2T, T3T, T2X, T2Y;
Chris@82 303 T2S = Ip[WS(rs, 12)];
Chris@82 304 T2T = Im[WS(rs, 12)];
Chris@82 305 T2U = T2S - T2T;
Chris@82 306 T2V = T2R * T2U;
Chris@82 307 T3W = T2S + T2T;
Chris@82 308 T2X = Rp[WS(rs, 12)];
Chris@82 309 T2Y = Rm[WS(rs, 12)];
Chris@82 310 T2Z = T2X + T2Y;
Chris@82 311 T3T = T2Y - T2X;
Chris@82 312 T3U = T3S * T3T;
Chris@82 313 T6u = T3V * T3T;
Chris@82 314 T5e = T2R * T2Z;
Chris@82 315 }
Chris@82 316 {
Chris@82 317 E T2Q, T30, T6t, T6v;
Chris@82 318 T2Q = FNMS(T2M, T2P, T2L);
Chris@82 319 T30 = FNMS(T2W, T2Z, T2V);
Chris@82 320 T31 = T2Q + T30;
Chris@82 321 T94 = T2Q - T30;
Chris@82 322 T6t = FMA(Th, T3Q, T6s);
Chris@82 323 T6v = FMA(T3S, T3W, T6u);
Chris@82 324 T6w = T6t + T6v;
Chris@82 325 T8K = T6t - T6v;
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E T3R, T3X, T5d, T5f;
Chris@82 329 T3R = FNMS(Tk, T3Q, T3P);
Chris@82 330 T3X = FNMS(T3V, T3W, T3U);
Chris@82 331 T3Y = T3R + T3X;
Chris@82 332 T8U = T3R - T3X;
Chris@82 333 T5d = FMA(T2M, T2K, T5c);
Chris@82 334 T5f = FMA(T2W, T2U, T5e);
Chris@82 335 T5g = T5d + T5f;
Chris@82 336 T80 = T5d - T5f;
Chris@82 337 }
Chris@82 338 }
Chris@82 339 {
Chris@82 340 E T12, T4J, T13, T65, T18, T4F, T4G, T5F, T1k, T4R, T1l, T67, T1q, T4N, T4O;
Chris@82 341 E T5H;
Chris@82 342 {
Chris@82 343 E T10, T11, T16, T17;
Chris@82 344 T10 = Ip[WS(rs, 15)];
Chris@82 345 T11 = Im[WS(rs, 15)];
Chris@82 346 T12 = T10 - T11;
Chris@82 347 T4J = T10 + T11;
Chris@82 348 T13 = TZ * T12;
Chris@82 349 T65 = T4E * T4J;
Chris@82 350 T16 = Rp[WS(rs, 15)];
Chris@82 351 T17 = Rm[WS(rs, 15)];
Chris@82 352 T18 = T16 + T17;
Chris@82 353 T4F = T16 - T17;
Chris@82 354 T4G = T4E * T4F;
Chris@82 355 T5F = TZ * T18;
Chris@82 356 }
Chris@82 357 {
Chris@82 358 E T1i, T1j, T1o, T1p;
Chris@82 359 T1i = Ip[WS(rs, 7)];
Chris@82 360 T1j = Im[WS(rs, 7)];
Chris@82 361 T1k = T1i - T1j;
Chris@82 362 T4R = T1i + T1j;
Chris@82 363 T1l = T1h * T1k;
Chris@82 364 T67 = T4M * T4R;
Chris@82 365 T1o = Rp[WS(rs, 7)];
Chris@82 366 T1p = Rm[WS(rs, 7)];
Chris@82 367 T1q = T1o + T1p;
Chris@82 368 T4N = T1o - T1p;
Chris@82 369 T4O = T4M * T4N;
Chris@82 370 T5H = T1h * T1q;
Chris@82 371 }
Chris@82 372 {
Chris@82 373 E T19, T1r, T66, T68;
Chris@82 374 T19 = FNMS(T15, T18, T13);
Chris@82 375 T1r = FNMS(T1n, T1q, T1l);
Chris@82 376 T1s = T19 + T1r;
Chris@82 377 T8h = T19 - T1r;
Chris@82 378 T66 = FNMS(T4I, T4F, T65);
Chris@82 379 T68 = FNMS(T4Q, T4N, T67);
Chris@82 380 T69 = T66 + T68;
Chris@82 381 T8B = T66 - T68;
Chris@82 382 }
Chris@82 383 {
Chris@82 384 E T4K, T4S, T5G, T5I;
Chris@82 385 T4K = FMA(T4I, T4J, T4G);
Chris@82 386 T4S = FMA(T4Q, T4R, T4O);
Chris@82 387 T4T = T4K + T4S;
Chris@82 388 T8q = T4S - T4K;
Chris@82 389 T5G = FMA(T15, T12, T5F);
Chris@82 390 T5I = FMA(T1n, T1k, T5H);
Chris@82 391 T5J = T5G + T5I;
Chris@82 392 T8k = T5G - T5I;
Chris@82 393 }
Chris@82 394 }
Chris@82 395 {
Chris@82 396 E T9, T4i, Ta, T5U, Tf, T4g, T4h, T5u, Tp, T4q, Tq, T5W, Tv, T4m, T4n;
Chris@82 397 E T5w;
Chris@82 398 {
Chris@82 399 E T7, T8, Td, Te;
Chris@82 400 T7 = Ip[WS(rs, 1)];
Chris@82 401 T8 = Im[WS(rs, 1)];
Chris@82 402 T9 = T7 - T8;
Chris@82 403 T4i = T7 + T8;
Chris@82 404 Ta = T6 * T9;
Chris@82 405 T5U = T2 * T4i;
Chris@82 406 Td = Rp[WS(rs, 1)];
Chris@82 407 Te = Rm[WS(rs, 1)];
Chris@82 408 Tf = Td + Te;
Chris@82 409 T4g = Td - Te;
Chris@82 410 T4h = T2 * T4g;
Chris@82 411 T5u = T6 * Tf;
Chris@82 412 }
Chris@82 413 {
Chris@82 414 E Tn, To, Tt, Tu;
Chris@82 415 Tn = Ip[WS(rs, 9)];
Chris@82 416 To = Im[WS(rs, 9)];
Chris@82 417 Tp = Tn - To;
Chris@82 418 T4q = Tn + To;
Chris@82 419 Tq = Tm * Tp;
Chris@82 420 T5W = T4l * T4q;
Chris@82 421 Tt = Rp[WS(rs, 9)];
Chris@82 422 Tu = Rm[WS(rs, 9)];
Chris@82 423 Tv = Tt + Tu;
Chris@82 424 T4m = Tt - Tu;
Chris@82 425 T4n = T4l * T4m;
Chris@82 426 T5w = Tm * Tv;
Chris@82 427 }
Chris@82 428 {
Chris@82 429 E Tg, Tw, T5v, T5x;
Chris@82 430 Tg = FNMS(Tc, Tf, Ta);
Chris@82 431 Tw = FNMS(Ts, Tv, Tq);
Chris@82 432 Tx = Tg + Tw;
Chris@82 433 T8a = Tg - Tw;
Chris@82 434 T5v = FMA(Tc, T9, T5u);
Chris@82 435 T5x = FMA(Ts, Tp, T5w);
Chris@82 436 T5y = T5v + T5x;
Chris@82 437 T8d = T5v - T5x;
Chris@82 438 {
Chris@82 439 E T4j, T4r, T8t, T5V, T5X, T8u;
Chris@82 440 T4j = FMA(T5, T4i, T4h);
Chris@82 441 T4r = FMA(T4p, T4q, T4n);
Chris@82 442 T8t = T4r - T4j;
Chris@82 443 T5V = FNMS(T5, T4g, T5U);
Chris@82 444 T5X = FNMS(T4p, T4m, T5W);
Chris@82 445 T8u = T5V - T5X;
Chris@82 446 T4s = T4j + T4r;
Chris@82 447 T8E = T8u + T8t;
Chris@82 448 T5Y = T5V + T5X;
Chris@82 449 T8v = T8t - T8u;
Chris@82 450 }
Chris@82 451 }
Chris@82 452 }
Chris@82 453 {
Chris@82 454 E T1Y, T1Z, T3p, T22, T3o, T6h, T5i, T2c, T2d, T3x, T2i, T3u, T6j, T5k;
Chris@82 455 {
Chris@82 456 E T1W, T1X, T3n, T20, T21;
Chris@82 457 T1W = Ip[WS(rs, 2)];
Chris@82 458 T1X = Im[WS(rs, 2)];
Chris@82 459 T1Y = T1W - T1X;
Chris@82 460 T1Z = T1a * T1Y;
Chris@82 461 T3p = T1W + T1X;
Chris@82 462 T20 = Rp[WS(rs, 2)];
Chris@82 463 T21 = Rm[WS(rs, 2)];
Chris@82 464 T22 = T20 + T21;
Chris@82 465 T3n = T21 - T20;
Chris@82 466 T3o = T1F * T3n;
Chris@82 467 T6h = T1H * T3n;
Chris@82 468 T5i = T1a * T22;
Chris@82 469 }
Chris@82 470 {
Chris@82 471 E T2a, T2b, T3t, T2g, T2h;
Chris@82 472 T2a = Ip[WS(rs, 10)];
Chris@82 473 T2b = Im[WS(rs, 10)];
Chris@82 474 T2c = T2a - T2b;
Chris@82 475 T2d = T29 * T2c;
Chris@82 476 T3x = T2a + T2b;
Chris@82 477 T2g = Rp[WS(rs, 10)];
Chris@82 478 T2h = Rm[WS(rs, 10)];
Chris@82 479 T2i = T2g + T2h;
Chris@82 480 T3t = T2h - T2g;
Chris@82 481 T3u = T3s * T3t;
Chris@82 482 T6j = T3w * T3t;
Chris@82 483 T5k = T29 * T2i;
Chris@82 484 }
Chris@82 485 {
Chris@82 486 E T23, T2j, T5j, T5l;
Chris@82 487 T23 = FNMS(T1c, T22, T1Z);
Chris@82 488 T2j = FNMS(T2f, T2i, T2d);
Chris@82 489 T2k = T23 + T2j;
Chris@82 490 T82 = T23 - T2j;
Chris@82 491 T5j = FMA(T1c, T1Y, T5i);
Chris@82 492 T5l = FMA(T2f, T2c, T5k);
Chris@82 493 T5m = T5j + T5l;
Chris@82 494 T83 = T5j - T5l;
Chris@82 495 {
Chris@82 496 E T3q, T3y, T8M, T6i, T6k, T8N;
Chris@82 497 T3q = FNMS(T1H, T3p, T3o);
Chris@82 498 T3y = FNMS(T3w, T3x, T3u);
Chris@82 499 T8M = T3q - T3y;
Chris@82 500 T6i = FMA(T1F, T3p, T6h);
Chris@82 501 T6k = FMA(T3s, T3x, T6j);
Chris@82 502 T8N = T6i - T6k;
Chris@82 503 T3z = T3q + T3y;
Chris@82 504 T8X = T8M + T8N;
Chris@82 505 T6l = T6i + T6k;
Chris@82 506 T8O = T8M - T8N;
Chris@82 507 }
Chris@82 508 }
Chris@82 509 }
Chris@82 510 {
Chris@82 511 E T2o, T2p, T3G, T2t, T3D, T6m, T5n, T2y, T2z, T3K, T2D, T3J, T6o, T5p;
Chris@82 512 {
Chris@82 513 E T2m, T2n, T3C, T2r, T2s;
Chris@82 514 T2m = Ip[WS(rs, 14)];
Chris@82 515 T2n = Im[WS(rs, 14)];
Chris@82 516 T2o = T2m - T2n;
Chris@82 517 T2p = T2l * T2o;
Chris@82 518 T3G = T2m + T2n;
Chris@82 519 T2r = Rp[WS(rs, 14)];
Chris@82 520 T2s = Rm[WS(rs, 14)];
Chris@82 521 T2t = T2r + T2s;
Chris@82 522 T3C = T2s - T2r;
Chris@82 523 T3D = T3B * T3C;
Chris@82 524 T6m = T3F * T3C;
Chris@82 525 T5n = T2l * T2t;
Chris@82 526 }
Chris@82 527 {
Chris@82 528 E T2w, T2x, T3I, T2B, T2C;
Chris@82 529 T2w = Ip[WS(rs, 6)];
Chris@82 530 T2x = Im[WS(rs, 6)];
Chris@82 531 T2y = T2w - T2x;
Chris@82 532 T2z = T2v * T2y;
Chris@82 533 T3K = T2w + T2x;
Chris@82 534 T2B = Rp[WS(rs, 6)];
Chris@82 535 T2C = Rm[WS(rs, 6)];
Chris@82 536 T2D = T2B + T2C;
Chris@82 537 T3I = T2C - T2B;
Chris@82 538 T3J = T1d * T3I;
Chris@82 539 T6o = T1g * T3I;
Chris@82 540 T5p = T2v * T2D;
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T2u, T2E, T5o, T5q;
Chris@82 544 T2u = FNMS(T2q, T2t, T2p);
Chris@82 545 T2E = FNMS(T2A, T2D, T2z);
Chris@82 546 T2F = T2u + T2E;
Chris@82 547 T86 = T2u - T2E;
Chris@82 548 T5o = FMA(T2q, T2o, T5n);
Chris@82 549 T5q = FMA(T2A, T2y, T5p);
Chris@82 550 T5r = T5o + T5q;
Chris@82 551 T85 = T5o - T5q;
Chris@82 552 {
Chris@82 553 E T3H, T3L, T8P, T6n, T6p, T8Q;
Chris@82 554 T3H = FNMS(T3F, T3G, T3D);
Chris@82 555 T3L = FNMS(T1g, T3K, T3J);
Chris@82 556 T8P = T3H - T3L;
Chris@82 557 T6n = FMA(T3B, T3G, T6m);
Chris@82 558 T6p = FMA(T1d, T3K, T6o);
Chris@82 559 T8Q = T6n - T6p;
Chris@82 560 T3M = T3H + T3L;
Chris@82 561 T8Y = T8Q - T8P;
Chris@82 562 T6q = T6n + T6p;
Chris@82 563 T8R = T8P + T8Q;
Chris@82 564 }
Chris@82 565 }
Chris@82 566 }
Chris@82 567 {
Chris@82 568 E TC, T4v, TD, T5Z, TI, T4t, T4u, T5z, TO, T4z, TP, T61, TU, T4x, T4y;
Chris@82 569 E T5B;
Chris@82 570 {
Chris@82 571 E TA, TB, TG, TH;
Chris@82 572 TA = Ip[WS(rs, 5)];
Chris@82 573 TB = Im[WS(rs, 5)];
Chris@82 574 TC = TA - TB;
Chris@82 575 T4v = TA + TB;
Chris@82 576 TD = Tz * TC;
Chris@82 577 T5Z = T32 * T4v;
Chris@82 578 TG = Rp[WS(rs, 5)];
Chris@82 579 TH = Rm[WS(rs, 5)];
Chris@82 580 TI = TG + TH;
Chris@82 581 T4t = TG - TH;
Chris@82 582 T4u = T32 * T4t;
Chris@82 583 T5z = Tz * TI;
Chris@82 584 }
Chris@82 585 {
Chris@82 586 E TM, TN, TS, TT;
Chris@82 587 TM = Ip[WS(rs, 13)];
Chris@82 588 TN = Im[WS(rs, 13)];
Chris@82 589 TO = TM - TN;
Chris@82 590 T4z = TM + TN;
Chris@82 591 TP = TL * TO;
Chris@82 592 T61 = Ti * T4z;
Chris@82 593 TS = Rp[WS(rs, 13)];
Chris@82 594 TT = Rm[WS(rs, 13)];
Chris@82 595 TU = TS + TT;
Chris@82 596 T4x = TS - TT;
Chris@82 597 T4y = Ti * T4x;
Chris@82 598 T5B = TL * TU;
Chris@82 599 }
Chris@82 600 {
Chris@82 601 E TJ, TV, T5A, T5C;
Chris@82 602 TJ = FNMS(TF, TI, TD);
Chris@82 603 TV = FNMS(TR, TU, TP);
Chris@82 604 TW = TJ + TV;
Chris@82 605 T8e = TJ - TV;
Chris@82 606 T5A = FMA(TF, TC, T5z);
Chris@82 607 T5C = FMA(TR, TO, T5B);
Chris@82 608 T5D = T5A + T5C;
Chris@82 609 T8b = T5A - T5C;
Chris@82 610 {
Chris@82 611 E T4w, T4A, T8x, T60, T62, T8w;
Chris@82 612 T4w = FMA(T34, T4v, T4u);
Chris@82 613 T4A = FMA(Tl, T4z, T4y);
Chris@82 614 T8x = T4w - T4A;
Chris@82 615 T60 = FNMS(T34, T4t, T5Z);
Chris@82 616 T62 = FNMS(Tl, T4x, T61);
Chris@82 617 T8w = T62 - T60;
Chris@82 618 T4B = T4w + T4A;
Chris@82 619 T8F = T8w - T8x;
Chris@82 620 T63 = T60 + T62;
Chris@82 621 T8y = T8w + T8x;
Chris@82 622 }
Chris@82 623 }
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T1V, T6S, T3l, T6I, T5Q, T6H, T5t, T6R, T56, T6W, T6g, T6M, T4f, T6X, T6D;
Chris@82 627 E T6P;
Chris@82 628 {
Chris@82 629 E TX, T1U, T5h, T5s;
Chris@82 630 TX = Tx + TW;
Chris@82 631 T1U = T1s + T1T;
Chris@82 632 T1V = TX + T1U;
Chris@82 633 T6S = TX - T1U;
Chris@82 634 {
Chris@82 635 E T2G, T3k, T5E, T5P;
Chris@82 636 T2G = T2k + T2F;
Chris@82 637 T3k = T31 + T3j;
Chris@82 638 T3l = T2G + T3k;
Chris@82 639 T6I = T3k - T2G;
Chris@82 640 T5E = T5y + T5D;
Chris@82 641 T5P = T5J + T5O;
Chris@82 642 T5Q = T5E + T5P;
Chris@82 643 T6H = T5P - T5E;
Chris@82 644 }
Chris@82 645 T5h = T5b + T5g;
Chris@82 646 T5s = T5m + T5r;
Chris@82 647 T5t = T5h + T5s;
Chris@82 648 T6R = T5h - T5s;
Chris@82 649 {
Chris@82 650 E T4C, T55, T6L, T64, T6f, T6K;
Chris@82 651 T4C = T4s + T4B;
Chris@82 652 T55 = T4T + T54;
Chris@82 653 T6L = T4C - T55;
Chris@82 654 T64 = T5Y + T63;
Chris@82 655 T6f = T69 + T6e;
Chris@82 656 T6K = T6f - T64;
Chris@82 657 T56 = T4C + T55;
Chris@82 658 T6W = T6K - T6L;
Chris@82 659 T6g = T64 + T6f;
Chris@82 660 T6M = T6K + T6L;
Chris@82 661 }
Chris@82 662 {
Chris@82 663 E T3N, T4e, T6N, T6r, T6C, T6O;
Chris@82 664 T3N = T3z + T3M;
Chris@82 665 T4e = T3Y + T4d;
Chris@82 666 T6N = T4e - T3N;
Chris@82 667 T6r = T6l + T6q;
Chris@82 668 T6C = T6w + T6B;
Chris@82 669 T6O = T6C - T6r;
Chris@82 670 T4f = T3N + T4e;
Chris@82 671 T6X = T6N + T6O;
Chris@82 672 T6D = T6r + T6C;
Chris@82 673 T6P = T6N - T6O;
Chris@82 674 }
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E T3m, T57, T6F, T6G;
Chris@82 678 T3m = T1V + T3l;
Chris@82 679 T57 = T4f - T56;
Chris@82 680 Ip[0] = KP500000000 * (T3m + T57);
Chris@82 681 Im[WS(rs, 15)] = KP500000000 * (T57 - T3m);
Chris@82 682 T6F = T5t + T5Q;
Chris@82 683 T6G = T6g + T6D;
Chris@82 684 Rm[WS(rs, 15)] = KP500000000 * (T6F - T6G);
Chris@82 685 Rp[0] = KP500000000 * (T6F + T6G);
Chris@82 686 }
Chris@82 687 {
Chris@82 688 E T5R, T5S, T5T, T6E;
Chris@82 689 T5R = T5t - T5Q;
Chris@82 690 T5S = T56 + T4f;
Chris@82 691 Rm[WS(rs, 7)] = KP500000000 * (T5R - T5S);
Chris@82 692 Rp[WS(rs, 8)] = KP500000000 * (T5R + T5S);
Chris@82 693 T5T = T3l - T1V;
Chris@82 694 T6E = T6g - T6D;
Chris@82 695 Ip[WS(rs, 8)] = KP500000000 * (T5T + T6E);
Chris@82 696 Im[WS(rs, 7)] = KP500000000 * (T6E - T5T);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E T6J, T6Q, T6Z, T70;
Chris@82 700 T6J = T6H + T6I;
Chris@82 701 T6Q = T6M + T6P;
Chris@82 702 Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6Q, T6J));
Chris@82 703 Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6Q, T6J)));
Chris@82 704 T6Z = T6R + T6S;
Chris@82 705 T70 = T6W + T6X;
Chris@82 706 Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T70, T6Z));
Chris@82 707 Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T70, T6Z));
Chris@82 708 }
Chris@82 709 {
Chris@82 710 E T6T, T6U, T6V, T6Y;
Chris@82 711 T6T = T6R - T6S;
Chris@82 712 T6U = T6P - T6M;
Chris@82 713 Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6U, T6T));
Chris@82 714 Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6U, T6T));
Chris@82 715 T6V = T6I - T6H;
Chris@82 716 T6Y = T6W - T6X;
Chris@82 717 Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6Y, T6V));
Chris@82 718 Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6Y, T6V)));
Chris@82 719 }
Chris@82 720 }
Chris@82 721 {
Chris@82 722 E T73, T7F, T7t, T7P, T7a, T7Q, T7w, T7G, T7i, T7U, T7A, T7K, T7p, T7V, T7B;
Chris@82 723 E T7N;
Chris@82 724 {
Chris@82 725 E T71, T72, T7r, T7s;
Chris@82 726 T71 = T5r - T5m;
Chris@82 727 T72 = T3j - T31;
Chris@82 728 T73 = T71 + T72;
Chris@82 729 T7F = T72 - T71;
Chris@82 730 T7r = T5b - T5g;
Chris@82 731 T7s = T2k - T2F;
Chris@82 732 T7t = T7r + T7s;
Chris@82 733 T7P = T7r - T7s;
Chris@82 734 }
Chris@82 735 {
Chris@82 736 E T76, T7u, T79, T7v;
Chris@82 737 {
Chris@82 738 E T74, T75, T77, T78;
Chris@82 739 T74 = Tx - TW;
Chris@82 740 T75 = T5y - T5D;
Chris@82 741 T76 = T74 - T75;
Chris@82 742 T7u = T75 + T74;
Chris@82 743 T77 = T5J - T5O;
Chris@82 744 T78 = T1s - T1T;
Chris@82 745 T79 = T77 + T78;
Chris@82 746 T7v = T77 - T78;
Chris@82 747 }
Chris@82 748 T7a = T76 + T79;
Chris@82 749 T7Q = T76 - T79;
Chris@82 750 T7w = T7u + T7v;
Chris@82 751 T7G = T7v - T7u;
Chris@82 752 }
Chris@82 753 {
Chris@82 754 E T7e, T7I, T7h, T7J;
Chris@82 755 {
Chris@82 756 E T7c, T7d, T7f, T7g;
Chris@82 757 T7c = T63 - T5Y;
Chris@82 758 T7d = T54 - T4T;
Chris@82 759 T7e = T7c + T7d;
Chris@82 760 T7I = T7c - T7d;
Chris@82 761 T7f = T4B - T4s;
Chris@82 762 T7g = T69 - T6e;
Chris@82 763 T7h = T7f + T7g;
Chris@82 764 T7J = T7g - T7f;
Chris@82 765 }
Chris@82 766 T7i = FMA(KP414213562, T7h, T7e);
Chris@82 767 T7U = FNMS(KP414213562, T7I, T7J);
Chris@82 768 T7A = FNMS(KP414213562, T7e, T7h);
Chris@82 769 T7K = FMA(KP414213562, T7J, T7I);
Chris@82 770 }
Chris@82 771 {
Chris@82 772 E T7l, T7L, T7o, T7M;
Chris@82 773 {
Chris@82 774 E T7j, T7k, T7m, T7n;
Chris@82 775 T7j = T6q - T6l;
Chris@82 776 T7k = T4d - T3Y;
Chris@82 777 T7l = T7j + T7k;
Chris@82 778 T7L = T7k - T7j;
Chris@82 779 T7m = T3z - T3M;
Chris@82 780 T7n = T6B - T6w;
Chris@82 781 T7o = T7m + T7n;
Chris@82 782 T7M = T7n - T7m;
Chris@82 783 }
Chris@82 784 T7p = FNMS(KP414213562, T7o, T7l);
Chris@82 785 T7V = FNMS(KP414213562, T7L, T7M);
Chris@82 786 T7B = FMA(KP414213562, T7l, T7o);
Chris@82 787 T7N = FMA(KP414213562, T7M, T7L);
Chris@82 788 }
Chris@82 789 {
Chris@82 790 E T7b, T7q, T7D, T7E;
Chris@82 791 T7b = FMA(KP707106781, T7a, T73);
Chris@82 792 T7q = T7i + T7p;
Chris@82 793 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7q, T7b));
Chris@82 794 Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T7q, T7b)));
Chris@82 795 T7D = FMA(KP707106781, T7w, T7t);
Chris@82 796 T7E = T7A + T7B;
Chris@82 797 Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T7E, T7D));
Chris@82 798 Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T7E, T7D));
Chris@82 799 }
Chris@82 800 {
Chris@82 801 E T7x, T7y, T7z, T7C;
Chris@82 802 T7x = FNMS(KP707106781, T7w, T7t);
Chris@82 803 T7y = T7p - T7i;
Chris@82 804 Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T7y, T7x));
Chris@82 805 Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7y, T7x));
Chris@82 806 T7z = FNMS(KP707106781, T7a, T73);
Chris@82 807 T7C = T7A - T7B;
Chris@82 808 Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T7C, T7z));
Chris@82 809 Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T7C, T7z)));
Chris@82 810 }
Chris@82 811 {
Chris@82 812 E T7H, T7O, T7X, T7Y;
Chris@82 813 T7H = FNMS(KP707106781, T7G, T7F);
Chris@82 814 T7O = T7K - T7N;
Chris@82 815 Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7O, T7H));
Chris@82 816 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7O, T7H)));
Chris@82 817 T7X = FNMS(KP707106781, T7Q, T7P);
Chris@82 818 T7Y = T7U + T7V;
Chris@82 819 Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7Y, T7X));
Chris@82 820 Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7Y, T7X));
Chris@82 821 }
Chris@82 822 {
Chris@82 823 E T7R, T7S, T7T, T7W;
Chris@82 824 T7R = FMA(KP707106781, T7Q, T7P);
Chris@82 825 T7S = T7K + T7N;
Chris@82 826 Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7S, T7R));
Chris@82 827 Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7S, T7R));
Chris@82 828 T7T = FMA(KP707106781, T7G, T7F);
Chris@82 829 T7W = T7U - T7V;
Chris@82 830 Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7W, T7T));
Chris@82 831 Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7W, T7T)));
Chris@82 832 }
Chris@82 833 }
Chris@82 834 {
Chris@82 835 E T89, Tat, T9l, Ta7, T99, Taj, T9v, T9H, T8o, T9w, T9c, T9m, Ta3, Tay, Tae;
Chris@82 836 E Tao, T8I, T9A, T9g, T9q, T9O, Tau, Taa, Tak, T9W, Taz, Taf, Tar, T91, T9B;
Chris@82 837 E T9h, T9t;
Chris@82 838 {
Chris@82 839 E T81, Ta5, T88, Ta6, T84, T87;
Chris@82 840 T81 = T7Z - T80;
Chris@82 841 Ta5 = T93 - T94;
Chris@82 842 T84 = T82 - T83;
Chris@82 843 T87 = T85 + T86;
Chris@82 844 T88 = T84 + T87;
Chris@82 845 Ta6 = T84 - T87;
Chris@82 846 T89 = FMA(KP707106781, T88, T81);
Chris@82 847 Tat = FNMS(KP707106781, Ta6, Ta5);
Chris@82 848 T9l = FNMS(KP707106781, T88, T81);
Chris@82 849 Ta7 = FMA(KP707106781, Ta6, Ta5);
Chris@82 850 }
Chris@82 851 {
Chris@82 852 E T95, T9F, T98, T9G, T96, T97;
Chris@82 853 T95 = T93 + T94;
Chris@82 854 T9F = T80 + T7Z;
Chris@82 855 T96 = T83 + T82;
Chris@82 856 T97 = T85 - T86;
Chris@82 857 T98 = T96 + T97;
Chris@82 858 T9G = T97 - T96;
Chris@82 859 T99 = FMA(KP707106781, T98, T95);
Chris@82 860 Taj = FNMS(KP707106781, T9G, T9F);
Chris@82 861 T9v = FNMS(KP707106781, T98, T95);
Chris@82 862 T9H = FMA(KP707106781, T9G, T9F);
Chris@82 863 }
Chris@82 864 {
Chris@82 865 E T8g, T9a, T8n, T9b;
Chris@82 866 {
Chris@82 867 E T8c, T8f, T8j, T8m;
Chris@82 868 T8c = T8a - T8b;
Chris@82 869 T8f = T8d + T8e;
Chris@82 870 T8g = FNMS(KP414213562, T8f, T8c);
Chris@82 871 T9a = FMA(KP414213562, T8c, T8f);
Chris@82 872 T8j = T8h - T8i;
Chris@82 873 T8m = T8k + T8l;
Chris@82 874 T8n = FMA(KP414213562, T8m, T8j);
Chris@82 875 T9b = FNMS(KP414213562, T8j, T8m);
Chris@82 876 }
Chris@82 877 T8o = T8g + T8n;
Chris@82 878 T9w = T8g - T8n;
Chris@82 879 T9c = T9a + T9b;
Chris@82 880 T9m = T9b - T9a;
Chris@82 881 }
Chris@82 882 {
Chris@82 883 E T9Z, Tam, Ta2, Tan;
Chris@82 884 {
Chris@82 885 E T9X, T9Y, Ta0, Ta1;
Chris@82 886 T9X = T8r - T8q;
Chris@82 887 T9Y = T8F - T8E;
Chris@82 888 T9Z = FNMS(KP707106781, T9Y, T9X);
Chris@82 889 Tam = FMA(KP707106781, T9Y, T9X);
Chris@82 890 Ta0 = T8B - T8C;
Chris@82 891 Ta1 = T8y - T8v;
Chris@82 892 Ta2 = FNMS(KP707106781, Ta1, Ta0);
Chris@82 893 Tan = FMA(KP707106781, Ta1, Ta0);
Chris@82 894 }
Chris@82 895 Ta3 = FNMS(KP668178637, Ta2, T9Z);
Chris@82 896 Tay = FNMS(KP198912367, Tam, Tan);
Chris@82 897 Tae = FMA(KP668178637, T9Z, Ta2);
Chris@82 898 Tao = FMA(KP198912367, Tan, Tam);
Chris@82 899 }
Chris@82 900 {
Chris@82 901 E T8A, T9o, T8H, T9p;
Chris@82 902 {
Chris@82 903 E T8s, T8z, T8D, T8G;
Chris@82 904 T8s = T8q + T8r;
Chris@82 905 T8z = T8v + T8y;
Chris@82 906 T8A = FMA(KP707106781, T8z, T8s);
Chris@82 907 T9o = FNMS(KP707106781, T8z, T8s);
Chris@82 908 T8D = T8B + T8C;
Chris@82 909 T8G = T8E + T8F;
Chris@82 910 T8H = FMA(KP707106781, T8G, T8D);
Chris@82 911 T9p = FNMS(KP707106781, T8G, T8D);
Chris@82 912 }
Chris@82 913 T8I = FMA(KP198912367, T8H, T8A);
Chris@82 914 T9A = FMA(KP668178637, T9o, T9p);
Chris@82 915 T9g = FNMS(KP198912367, T8A, T8H);
Chris@82 916 T9q = FNMS(KP668178637, T9p, T9o);
Chris@82 917 }
Chris@82 918 {
Chris@82 919 E T9K, Ta9, T9N, Ta8;
Chris@82 920 {
Chris@82 921 E T9I, T9J, T9L, T9M;
Chris@82 922 T9I = T8k - T8l;
Chris@82 923 T9J = T8h + T8i;
Chris@82 924 T9K = FMA(KP414213562, T9J, T9I);
Chris@82 925 Ta9 = FNMS(KP414213562, T9I, T9J);
Chris@82 926 T9L = T8d - T8e;
Chris@82 927 T9M = T8a + T8b;
Chris@82 928 T9N = FNMS(KP414213562, T9M, T9L);
Chris@82 929 Ta8 = FMA(KP414213562, T9L, T9M);
Chris@82 930 }
Chris@82 931 T9O = T9K - T9N;
Chris@82 932 Tau = T9N + T9K;
Chris@82 933 Taa = Ta8 - Ta9;
Chris@82 934 Tak = Ta8 + Ta9;
Chris@82 935 }
Chris@82 936 {
Chris@82 937 E T9S, Tap, T9V, Taq;
Chris@82 938 {
Chris@82 939 E T9Q, T9R, T9T, T9U;
Chris@82 940 T9Q = T8K + T8J;
Chris@82 941 T9R = T8X - T8Y;
Chris@82 942 T9S = FNMS(KP707106781, T9R, T9Q);
Chris@82 943 Tap = FMA(KP707106781, T9R, T9Q);
Chris@82 944 T9T = T8V - T8U;
Chris@82 945 T9U = T8R - T8O;
Chris@82 946 T9V = FNMS(KP707106781, T9U, T9T);
Chris@82 947 Taq = FMA(KP707106781, T9U, T9T);
Chris@82 948 }
Chris@82 949 T9W = FNMS(KP668178637, T9V, T9S);
Chris@82 950 Taz = FNMS(KP198912367, Tap, Taq);
Chris@82 951 Taf = FMA(KP668178637, T9S, T9V);
Chris@82 952 Tar = FMA(KP198912367, Taq, Tap);
Chris@82 953 }
Chris@82 954 {
Chris@82 955 E T8T, T9r, T90, T9s;
Chris@82 956 {
Chris@82 957 E T8L, T8S, T8W, T8Z;
Chris@82 958 T8L = T8J - T8K;
Chris@82 959 T8S = T8O + T8R;
Chris@82 960 T8T = FMA(KP707106781, T8S, T8L);
Chris@82 961 T9r = FNMS(KP707106781, T8S, T8L);
Chris@82 962 T8W = T8U + T8V;
Chris@82 963 T8Z = T8X + T8Y;
Chris@82 964 T90 = FMA(KP707106781, T8Z, T8W);
Chris@82 965 T9s = FNMS(KP707106781, T8Z, T8W);
Chris@82 966 }
Chris@82 967 T91 = FNMS(KP198912367, T90, T8T);
Chris@82 968 T9B = FNMS(KP668178637, T9r, T9s);
Chris@82 969 T9h = FMA(KP198912367, T8T, T90);
Chris@82 970 T9t = FMA(KP668178637, T9s, T9r);
Chris@82 971 }
Chris@82 972 {
Chris@82 973 E T8p, T92, T9j, T9k;
Chris@82 974 T8p = FMA(KP923879532, T8o, T89);
Chris@82 975 T92 = T8I + T91;
Chris@82 976 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T92, T8p));
Chris@82 977 Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T92, T8p)));
Chris@82 978 T9j = FMA(KP923879532, T9c, T99);
Chris@82 979 T9k = T9g + T9h;
Chris@82 980 Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T9k, T9j));
Chris@82 981 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T9k, T9j));
Chris@82 982 }
Chris@82 983 {
Chris@82 984 E T9d, T9e, T9f, T9i;
Chris@82 985 T9d = FNMS(KP923879532, T9c, T99);
Chris@82 986 T9e = T91 - T8I;
Chris@82 987 Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T9e, T9d));
Chris@82 988 Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9e, T9d));
Chris@82 989 T9f = FNMS(KP923879532, T8o, T89);
Chris@82 990 T9i = T9g - T9h;
Chris@82 991 Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T9i, T9f));
Chris@82 992 Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T9i, T9f)));
Chris@82 993 }
Chris@82 994 {
Chris@82 995 E T9n, T9u, T9D, T9E;
Chris@82 996 T9n = FNMS(KP923879532, T9m, T9l);
Chris@82 997 T9u = T9q + T9t;
Chris@82 998 Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9u, T9n));
Chris@82 999 Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T9u, T9n)));
Chris@82 1000 T9D = FNMS(KP923879532, T9w, T9v);
Chris@82 1001 T9E = T9A + T9B;
Chris@82 1002 Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T9E, T9D));
Chris@82 1003 Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T9E, T9D));
Chris@82 1004 }
Chris@82 1005 {
Chris@82 1006 E T9x, T9y, T9z, T9C;
Chris@82 1007 T9x = FMA(KP923879532, T9w, T9v);
Chris@82 1008 T9y = T9t - T9q;
Chris@82 1009 Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T9y, T9x));
Chris@82 1010 Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9y, T9x));
Chris@82 1011 T9z = FMA(KP923879532, T9m, T9l);
Chris@82 1012 T9C = T9A - T9B;
Chris@82 1013 Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T9C, T9z));
Chris@82 1014 Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T9C, T9z)));
Chris@82 1015 }
Chris@82 1016 {
Chris@82 1017 E T9P, Ta4, Tah, Tai;
Chris@82 1018 T9P = FMA(KP923879532, T9O, T9H);
Chris@82 1019 Ta4 = T9W - Ta3;
Chris@82 1020 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Ta4, T9P));
Chris@82 1021 Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, Ta4, T9P)));
Chris@82 1022 Tah = FMA(KP923879532, Taa, Ta7);
Chris@82 1023 Tai = Tae + Taf;
Chris@82 1024 Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, Tai, Tah));
Chris@82 1025 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, Tai, Tah));
Chris@82 1026 }
Chris@82 1027 {
Chris@82 1028 E Tab, Tac, Tad, Tag;
Chris@82 1029 Tab = FNMS(KP923879532, Taa, Ta7);
Chris@82 1030 Tac = Ta3 + T9W;
Chris@82 1031 Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, Tac, Tab));
Chris@82 1032 Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tac, Tab));
Chris@82 1033 Tad = FNMS(KP923879532, T9O, T9H);
Chris@82 1034 Tag = Tae - Taf;
Chris@82 1035 Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, Tag, Tad));
Chris@82 1036 Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, Tag, Tad)));
Chris@82 1037 }
Chris@82 1038 {
Chris@82 1039 E Tal, Tas, TaB, TaC;
Chris@82 1040 Tal = FMA(KP923879532, Tak, Taj);
Chris@82 1041 Tas = Tao - Tar;
Chris@82 1042 Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, Tas, Tal));
Chris@82 1043 Im[0] = -(KP500000000 * (FNMS(KP980785280, Tas, Tal)));
Chris@82 1044 TaB = FMA(KP923879532, Tau, Tat);
Chris@82 1045 TaC = Tay + Taz;
Chris@82 1046 Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, TaC, TaB));
Chris@82 1047 Rm[0] = KP500000000 * (FMA(KP980785280, TaC, TaB));
Chris@82 1048 }
Chris@82 1049 {
Chris@82 1050 E Tav, Taw, Tax, TaA;
Chris@82 1051 Tav = FNMS(KP923879532, Tau, Tat);
Chris@82 1052 Taw = Tao + Tar;
Chris@82 1053 Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, Taw, Tav));
Chris@82 1054 Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Taw, Tav));
Chris@82 1055 Tax = FNMS(KP923879532, Tak, Taj);
Chris@82 1056 TaA = Tay - Taz;
Chris@82 1057 Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, TaA, Tax));
Chris@82 1058 Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, TaA, Tax)));
Chris@82 1059 }
Chris@82 1060 }
Chris@82 1061 }
Chris@82 1062 }
Chris@82 1063 }
Chris@82 1064 }
Chris@82 1065
Chris@82 1066 static const tw_instr twinstr[] = {
Chris@82 1067 {TW_CEXP, 1, 1},
Chris@82 1068 {TW_CEXP, 1, 3},
Chris@82 1069 {TW_CEXP, 1, 9},
Chris@82 1070 {TW_CEXP, 1, 27},
Chris@82 1071 {TW_NEXT, 1, 0}
Chris@82 1072 };
Chris@82 1073
Chris@82 1074 static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {300, 162, 252, 0} };
Chris@82 1075
Chris@82 1076 void X(codelet_hc2cfdft2_32) (planner *p) {
Chris@82 1077 X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT);
Chris@82 1078 }
Chris@82 1079 #else
Chris@82 1080
Chris@82 1081 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include rdft/scalar/hc2cf.h */
Chris@82 1082
Chris@82 1083 /*
Chris@82 1084 * This function contains 552 FP additions, 300 FP multiplications,
Chris@82 1085 * (or, 440 additions, 188 multiplications, 112 fused multiply/add),
Chris@82 1086 * 166 stack variables, 9 constants, and 128 memory accesses
Chris@82 1087 */
Chris@82 1088 #include "rdft/scalar/hc2cf.h"
Chris@82 1089
Chris@82 1090 static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1091 {
Chris@82 1092 DK(KP277785116, +0.277785116509801112371415406974266437187468595);
Chris@82 1093 DK(KP415734806, +0.415734806151272618539394188808952878369280406);
Chris@82 1094 DK(KP097545161, +0.097545161008064133924142434238511120463845809);
Chris@82 1095 DK(KP490392640, +0.490392640201615224563091118067119518486966865);
Chris@82 1096 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1097 DK(KP191341716, +0.191341716182544885864229992015199433380672281);
Chris@82 1098 DK(KP461939766, +0.461939766255643378064091594698394143411208313);
Chris@82 1099 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@82 1100 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 1101 {
Chris@82 1102 INT m;
Chris@82 1103 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@82 1104 E T1, T4, T2, T5, T7, T1b, T1d, Td, Ti, Tk, Tj, Tl, TL, TR, T2h;
Chris@82 1105 E T2O, T16, T2l, T10, T2K, Tm, Tq, T3s, T3K, T3w, T3M, T4e, T4u, T4i, T4w;
Chris@82 1106 E Ty, TE, T3h, T3j, T2q, T2u, T4l, T4n, T1v, T1B, T3E, T3G, T2B, T2F, T3Y;
Chris@82 1107 E T40, T1f, T1G, T1i, T1H, T1j, T1M, T1n, T1I, T23, T2U, T26, T2V, T27, T30;
Chris@82 1108 E T2b, T2W;
Chris@82 1109 {
Chris@82 1110 E Tw, T1A, TD, T1t, Tx, T1z, TC, T1u, TJ, T15, TQ, TY, TK, T14, TP;
Chris@82 1111 E TZ;
Chris@82 1112 {
Chris@82 1113 E T3, Tc, T6, Tb;
Chris@82 1114 T1 = W[0];
Chris@82 1115 T4 = W[1];
Chris@82 1116 T2 = W[2];
Chris@82 1117 T5 = W[3];
Chris@82 1118 T3 = T1 * T2;
Chris@82 1119 Tc = T4 * T2;
Chris@82 1120 T6 = T4 * T5;
Chris@82 1121 Tb = T1 * T5;
Chris@82 1122 T7 = T3 + T6;
Chris@82 1123 T1b = T3 - T6;
Chris@82 1124 T1d = Tb + Tc;
Chris@82 1125 Td = Tb - Tc;
Chris@82 1126 Ti = W[4];
Chris@82 1127 Tw = T1 * Ti;
Chris@82 1128 T1A = T5 * Ti;
Chris@82 1129 TD = T4 * Ti;
Chris@82 1130 T1t = T2 * Ti;
Chris@82 1131 Tk = W[5];
Chris@82 1132 Tx = T4 * Tk;
Chris@82 1133 T1z = T2 * Tk;
Chris@82 1134 TC = T1 * Tk;
Chris@82 1135 T1u = T5 * Tk;
Chris@82 1136 Tj = W[6];
Chris@82 1137 TJ = T1 * Tj;
Chris@82 1138 T15 = T5 * Tj;
Chris@82 1139 TQ = T4 * Tj;
Chris@82 1140 TY = T2 * Tj;
Chris@82 1141 Tl = W[7];
Chris@82 1142 TK = T4 * Tl;
Chris@82 1143 T14 = T2 * Tl;
Chris@82 1144 TP = T1 * Tl;
Chris@82 1145 TZ = T5 * Tl;
Chris@82 1146 }
Chris@82 1147 TL = TJ + TK;
Chris@82 1148 TR = TP - TQ;
Chris@82 1149 T2h = TJ - TK;
Chris@82 1150 T2O = T14 - T15;
Chris@82 1151 T16 = T14 + T15;
Chris@82 1152 T2l = TP + TQ;
Chris@82 1153 T10 = TY - TZ;
Chris@82 1154 T2K = TY + TZ;
Chris@82 1155 Tm = FMA(Ti, Tj, Tk * Tl);
Chris@82 1156 Tq = FNMS(Tk, Tj, Ti * Tl);
Chris@82 1157 {
Chris@82 1158 E T3q, T3r, T3u, T3v;
Chris@82 1159 T3q = T7 * Tj;
Chris@82 1160 T3r = Td * Tl;
Chris@82 1161 T3s = T3q + T3r;
Chris@82 1162 T3K = T3q - T3r;
Chris@82 1163 T3u = T7 * Tl;
Chris@82 1164 T3v = Td * Tj;
Chris@82 1165 T3w = T3u - T3v;
Chris@82 1166 T3M = T3u + T3v;
Chris@82 1167 }
Chris@82 1168 {
Chris@82 1169 E T4c, T4d, T4g, T4h;
Chris@82 1170 T4c = T1b * Tj;
Chris@82 1171 T4d = T1d * Tl;
Chris@82 1172 T4e = T4c - T4d;
Chris@82 1173 T4u = T4c + T4d;
Chris@82 1174 T4g = T1b * Tl;
Chris@82 1175 T4h = T1d * Tj;
Chris@82 1176 T4i = T4g + T4h;
Chris@82 1177 T4w = T4g - T4h;
Chris@82 1178 Ty = Tw - Tx;
Chris@82 1179 TE = TC + TD;
Chris@82 1180 T3h = FMA(Ty, Tj, TE * Tl);
Chris@82 1181 T3j = FNMS(TE, Tj, Ty * Tl);
Chris@82 1182 }
Chris@82 1183 T2q = T1t - T1u;
Chris@82 1184 T2u = T1z + T1A;
Chris@82 1185 T4l = FMA(T2q, Tj, T2u * Tl);
Chris@82 1186 T4n = FNMS(T2u, Tj, T2q * Tl);
Chris@82 1187 T1v = T1t + T1u;
Chris@82 1188 T1B = T1z - T1A;
Chris@82 1189 T3E = FMA(T1v, Tj, T1B * Tl);
Chris@82 1190 T3G = FNMS(T1B, Tj, T1v * Tl);
Chris@82 1191 T2B = Tw + Tx;
Chris@82 1192 T2F = TC - TD;
Chris@82 1193 T3Y = FMA(T2B, Tj, T2F * Tl);
Chris@82 1194 T40 = FNMS(T2F, Tj, T2B * Tl);
Chris@82 1195 {
Chris@82 1196 E T1c, T1e, T1g, T1h;
Chris@82 1197 T1c = T1b * Ti;
Chris@82 1198 T1e = T1d * Tk;
Chris@82 1199 T1f = T1c - T1e;
Chris@82 1200 T1G = T1c + T1e;
Chris@82 1201 T1g = T1b * Tk;
Chris@82 1202 T1h = T1d * Ti;
Chris@82 1203 T1i = T1g + T1h;
Chris@82 1204 T1H = T1g - T1h;
Chris@82 1205 }
Chris@82 1206 T1j = FMA(T1f, Tj, T1i * Tl);
Chris@82 1207 T1M = FNMS(T1H, Tj, T1G * Tl);
Chris@82 1208 T1n = FNMS(T1i, Tj, T1f * Tl);
Chris@82 1209 T1I = FMA(T1G, Tj, T1H * Tl);
Chris@82 1210 {
Chris@82 1211 E T21, T22, T24, T25;
Chris@82 1212 T21 = T7 * Ti;
Chris@82 1213 T22 = Td * Tk;
Chris@82 1214 T23 = T21 + T22;
Chris@82 1215 T2U = T21 - T22;
Chris@82 1216 T24 = T7 * Tk;
Chris@82 1217 T25 = Td * Ti;
Chris@82 1218 T26 = T24 - T25;
Chris@82 1219 T2V = T24 + T25;
Chris@82 1220 }
Chris@82 1221 T27 = FMA(T23, Tj, T26 * Tl);
Chris@82 1222 T30 = FNMS(T2V, Tj, T2U * Tl);
Chris@82 1223 T2b = FNMS(T26, Tj, T23 * Tl);
Chris@82 1224 T2W = FMA(T2U, Tj, T2V * Tl);
Chris@82 1225 }
Chris@82 1226 {
Chris@82 1227 E T38, T7l, T7S, T8Y, T7Z, T91, T3A, T6k, T4F, T83, T5C, T6n, T2T, T84, T4I;
Chris@82 1228 E T7m, T2g, T4M, T4P, T2z, T3T, T6m, T7O, T7V, T7j, T87, T5v, T6j, T7L, T7U;
Chris@82 1229 E T7g, T86, Tv, TW, T61, T4U, T4X, T62, T4b, T6c, T7v, T7C, T5g, T6f, T74;
Chris@82 1230 E T8G, T7s, T7B, T71, T8F, T1s, T1R, T65, T51, T54, T64, T4A, T6g, T7G, T8U;
Chris@82 1231 E T5n, T6d, T7b, T8J, T7z, T8R, T78, T8I;
Chris@82 1232 {
Chris@82 1233 E T2E, T2I, T3p, T5w, T37, T4D, T3g, T5A, T2N, T2R, T3y, T5x, T2Z, T33, T3l;
Chris@82 1234 E T5z;
Chris@82 1235 {
Chris@82 1236 E T2C, T2D, T3o, T2G, T2H, T3n;
Chris@82 1237 T2C = Ip[WS(rs, 4)];
Chris@82 1238 T2D = Im[WS(rs, 4)];
Chris@82 1239 T3o = T2C + T2D;
Chris@82 1240 T2G = Rp[WS(rs, 4)];
Chris@82 1241 T2H = Rm[WS(rs, 4)];
Chris@82 1242 T3n = T2G - T2H;
Chris@82 1243 T2E = T2C - T2D;
Chris@82 1244 T2I = T2G + T2H;
Chris@82 1245 T3p = FMA(Ti, T3n, Tk * T3o);
Chris@82 1246 T5w = FNMS(Tk, T3n, Ti * T3o);
Chris@82 1247 }
Chris@82 1248 {
Chris@82 1249 E T35, T36, T3f, T3c, T3d, T3e;
Chris@82 1250 T35 = Ip[0];
Chris@82 1251 T36 = Im[0];
Chris@82 1252 T3f = T35 + T36;
Chris@82 1253 T3c = Rm[0];
Chris@82 1254 T3d = Rp[0];
Chris@82 1255 T3e = T3c - T3d;
Chris@82 1256 T37 = T35 - T36;
Chris@82 1257 T4D = T3d + T3c;
Chris@82 1258 T3g = FNMS(T4, T3f, T1 * T3e);
Chris@82 1259 T5A = FMA(T4, T3e, T1 * T3f);
Chris@82 1260 }
Chris@82 1261 {
Chris@82 1262 E T2L, T2M, T3x, T2P, T2Q, T3t;
Chris@82 1263 T2L = Ip[WS(rs, 12)];
Chris@82 1264 T2M = Im[WS(rs, 12)];
Chris@82 1265 T3x = T2L + T2M;
Chris@82 1266 T2P = Rp[WS(rs, 12)];
Chris@82 1267 T2Q = Rm[WS(rs, 12)];
Chris@82 1268 T3t = T2P - T2Q;
Chris@82 1269 T2N = T2L - T2M;
Chris@82 1270 T2R = T2P + T2Q;
Chris@82 1271 T3y = FMA(T3s, T3t, T3w * T3x);
Chris@82 1272 T5x = FNMS(T3w, T3t, T3s * T3x);
Chris@82 1273 }
Chris@82 1274 {
Chris@82 1275 E T2X, T2Y, T3k, T31, T32, T3i;
Chris@82 1276 T2X = Ip[WS(rs, 8)];
Chris@82 1277 T2Y = Im[WS(rs, 8)];
Chris@82 1278 T3k = T2X + T2Y;
Chris@82 1279 T31 = Rp[WS(rs, 8)];
Chris@82 1280 T32 = Rm[WS(rs, 8)];
Chris@82 1281 T3i = T31 - T32;
Chris@82 1282 T2Z = T2X - T2Y;
Chris@82 1283 T33 = T31 + T32;
Chris@82 1284 T3l = FMA(T3h, T3i, T3j * T3k);
Chris@82 1285 T5z = FNMS(T3j, T3i, T3h * T3k);
Chris@82 1286 }
Chris@82 1287 {
Chris@82 1288 E T34, T7Q, T7R, T4E, T5y, T5B;
Chris@82 1289 T34 = FNMS(T30, T33, T2W * T2Z);
Chris@82 1290 T38 = T34 + T37;
Chris@82 1291 T7l = T37 - T34;
Chris@82 1292 T7Q = T3l + T3g;
Chris@82 1293 T7R = T5w - T5x;
Chris@82 1294 T7S = T7Q - T7R;
Chris@82 1295 T8Y = T7R + T7Q;
Chris@82 1296 {
Chris@82 1297 E T7X, T7Y, T3m, T3z;
Chris@82 1298 T7X = T3y - T3p;
Chris@82 1299 T7Y = T5A - T5z;
Chris@82 1300 T7Z = T7X + T7Y;
Chris@82 1301 T91 = T7Y - T7X;
Chris@82 1302 T3m = T3g - T3l;
Chris@82 1303 T3z = T3p + T3y;
Chris@82 1304 T3A = T3m - T3z;
Chris@82 1305 T6k = T3z + T3m;
Chris@82 1306 }
Chris@82 1307 T4E = FMA(T2W, T33, T30 * T2Z);
Chris@82 1308 T4F = T4D + T4E;
Chris@82 1309 T83 = T4D - T4E;
Chris@82 1310 T5y = T5w + T5x;
Chris@82 1311 T5B = T5z + T5A;
Chris@82 1312 T5C = T5y + T5B;
Chris@82 1313 T6n = T5B - T5y;
Chris@82 1314 {
Chris@82 1315 E T2J, T2S, T4G, T4H;
Chris@82 1316 T2J = FNMS(T2F, T2I, T2B * T2E);
Chris@82 1317 T2S = FNMS(T2O, T2R, T2K * T2N);
Chris@82 1318 T2T = T2J + T2S;
Chris@82 1319 T84 = T2J - T2S;
Chris@82 1320 T4G = FMA(T2B, T2I, T2F * T2E);
Chris@82 1321 T4H = FMA(T2K, T2R, T2O * T2N);
Chris@82 1322 T4I = T4G + T4H;
Chris@82 1323 T7m = T4G - T4H;
Chris@82 1324 }
Chris@82 1325 }
Chris@82 1326 }
Chris@82 1327 {
Chris@82 1328 E T20, T5p, T3D, T4K, T2y, T5t, T3R, T4O, T2f, T5q, T3I, T4L, T2p, T5s, T3O;
Chris@82 1329 E T4N;
Chris@82 1330 {
Chris@82 1331 E T1W, T3C, T1Z, T3B;
Chris@82 1332 {
Chris@82 1333 E T1U, T1V, T1X, T1Y;
Chris@82 1334 T1U = Ip[WS(rs, 2)];
Chris@82 1335 T1V = Im[WS(rs, 2)];
Chris@82 1336 T1W = T1U - T1V;
Chris@82 1337 T3C = T1U + T1V;
Chris@82 1338 T1X = Rp[WS(rs, 2)];
Chris@82 1339 T1Y = Rm[WS(rs, 2)];
Chris@82 1340 T1Z = T1X + T1Y;
Chris@82 1341 T3B = T1X - T1Y;
Chris@82 1342 }
Chris@82 1343 T20 = FNMS(T1d, T1Z, T1b * T1W);
Chris@82 1344 T5p = FNMS(T1H, T3B, T1G * T3C);
Chris@82 1345 T3D = FMA(T1G, T3B, T1H * T3C);
Chris@82 1346 T4K = FMA(T1b, T1Z, T1d * T1W);
Chris@82 1347 }
Chris@82 1348 {
Chris@82 1349 E T2t, T3Q, T2x, T3P;
Chris@82 1350 {
Chris@82 1351 E T2r, T2s, T2v, T2w;
Chris@82 1352 T2r = Ip[WS(rs, 6)];
Chris@82 1353 T2s = Im[WS(rs, 6)];
Chris@82 1354 T2t = T2r - T2s;
Chris@82 1355 T3Q = T2r + T2s;
Chris@82 1356 T2v = Rp[WS(rs, 6)];
Chris@82 1357 T2w = Rm[WS(rs, 6)];
Chris@82 1358 T2x = T2v + T2w;
Chris@82 1359 T3P = T2v - T2w;
Chris@82 1360 }
Chris@82 1361 T2y = FNMS(T2u, T2x, T2q * T2t);
Chris@82 1362 T5t = FNMS(T1i, T3P, T1f * T3Q);
Chris@82 1363 T3R = FMA(T1f, T3P, T1i * T3Q);
Chris@82 1364 T4O = FMA(T2q, T2x, T2u * T2t);
Chris@82 1365 }
Chris@82 1366 {
Chris@82 1367 E T2a, T3H, T2e, T3F;
Chris@82 1368 {
Chris@82 1369 E T28, T29, T2c, T2d;
Chris@82 1370 T28 = Ip[WS(rs, 10)];
Chris@82 1371 T29 = Im[WS(rs, 10)];
Chris@82 1372 T2a = T28 - T29;
Chris@82 1373 T3H = T28 + T29;
Chris@82 1374 T2c = Rp[WS(rs, 10)];
Chris@82 1375 T2d = Rm[WS(rs, 10)];
Chris@82 1376 T2e = T2c + T2d;
Chris@82 1377 T3F = T2c - T2d;
Chris@82 1378 }
Chris@82 1379 T2f = FNMS(T2b, T2e, T27 * T2a);
Chris@82 1380 T5q = FNMS(T3G, T3F, T3E * T3H);
Chris@82 1381 T3I = FMA(T3E, T3F, T3G * T3H);
Chris@82 1382 T4L = FMA(T27, T2e, T2b * T2a);
Chris@82 1383 }
Chris@82 1384 {
Chris@82 1385 E T2k, T3N, T2o, T3L;
Chris@82 1386 {
Chris@82 1387 E T2i, T2j, T2m, T2n;
Chris@82 1388 T2i = Ip[WS(rs, 14)];
Chris@82 1389 T2j = Im[WS(rs, 14)];
Chris@82 1390 T2k = T2i - T2j;
Chris@82 1391 T3N = T2i + T2j;
Chris@82 1392 T2m = Rp[WS(rs, 14)];
Chris@82 1393 T2n = Rm[WS(rs, 14)];
Chris@82 1394 T2o = T2m + T2n;
Chris@82 1395 T3L = T2m - T2n;
Chris@82 1396 }
Chris@82 1397 T2p = FNMS(T2l, T2o, T2h * T2k);
Chris@82 1398 T5s = FNMS(T3M, T3L, T3K * T3N);
Chris@82 1399 T3O = FMA(T3K, T3L, T3M * T3N);
Chris@82 1400 T4N = FMA(T2h, T2o, T2l * T2k);
Chris@82 1401 }
Chris@82 1402 {
Chris@82 1403 E T3J, T3S, T5r, T5u;
Chris@82 1404 T2g = T20 + T2f;
Chris@82 1405 T4M = T4K + T4L;
Chris@82 1406 T4P = T4N + T4O;
Chris@82 1407 T2z = T2p + T2y;
Chris@82 1408 T3J = T3D + T3I;
Chris@82 1409 T3S = T3O + T3R;
Chris@82 1410 T3T = T3J + T3S;
Chris@82 1411 T6m = T3S - T3J;
Chris@82 1412 {
Chris@82 1413 E T7M, T7N, T7h, T7i;
Chris@82 1414 T7M = T5s - T5t;
Chris@82 1415 T7N = T3R - T3O;
Chris@82 1416 T7O = T7M + T7N;
Chris@82 1417 T7V = T7M - T7N;
Chris@82 1418 T7h = T4N - T4O;
Chris@82 1419 T7i = T2p - T2y;
Chris@82 1420 T7j = T7h + T7i;
Chris@82 1421 T87 = T7h - T7i;
Chris@82 1422 }
Chris@82 1423 T5r = T5p + T5q;
Chris@82 1424 T5u = T5s + T5t;
Chris@82 1425 T5v = T5r + T5u;
Chris@82 1426 T6j = T5u - T5r;
Chris@82 1427 {
Chris@82 1428 E T7J, T7K, T7e, T7f;
Chris@82 1429 T7J = T3I - T3D;
Chris@82 1430 T7K = T5p - T5q;
Chris@82 1431 T7L = T7J - T7K;
Chris@82 1432 T7U = T7K + T7J;
Chris@82 1433 T7e = T20 - T2f;
Chris@82 1434 T7f = T4K - T4L;
Chris@82 1435 T7g = T7e - T7f;
Chris@82 1436 T86 = T7f + T7e;
Chris@82 1437 }
Chris@82 1438 }
Chris@82 1439 }
Chris@82 1440 {
Chris@82 1441 E Th, T5a, T3X, T4S, TV, T5e, T49, T4W, Tu, T5b, T42, T4T, TI, T5d, T46;
Chris@82 1442 E T4V;
Chris@82 1443 {
Chris@82 1444 E Ta, T3W, Tg, T3V;
Chris@82 1445 {
Chris@82 1446 E T8, T9, Te, Tf;
Chris@82 1447 T8 = Ip[WS(rs, 1)];
Chris@82 1448 T9 = Im[WS(rs, 1)];
Chris@82 1449 Ta = T8 - T9;
Chris@82 1450 T3W = T8 + T9;
Chris@82 1451 Te = Rp[WS(rs, 1)];
Chris@82 1452 Tf = Rm[WS(rs, 1)];
Chris@82 1453 Tg = Te + Tf;
Chris@82 1454 T3V = Te - Tf;
Chris@82 1455 }
Chris@82 1456 Th = FNMS(Td, Tg, T7 * Ta);
Chris@82 1457 T5a = FNMS(T5, T3V, T2 * T3W);
Chris@82 1458 T3X = FMA(T2, T3V, T5 * T3W);
Chris@82 1459 T4S = FMA(T7, Tg, Td * Ta);
Chris@82 1460 }
Chris@82 1461 {
Chris@82 1462 E TO, T48, TU, T47;
Chris@82 1463 {
Chris@82 1464 E TM, TN, TS, TT;
Chris@82 1465 TM = Ip[WS(rs, 13)];
Chris@82 1466 TN = Im[WS(rs, 13)];
Chris@82 1467 TO = TM - TN;
Chris@82 1468 T48 = TM + TN;
Chris@82 1469 TS = Rp[WS(rs, 13)];
Chris@82 1470 TT = Rm[WS(rs, 13)];
Chris@82 1471 TU = TS + TT;
Chris@82 1472 T47 = TS - TT;
Chris@82 1473 }
Chris@82 1474 TV = FNMS(TR, TU, TL * TO);
Chris@82 1475 T5e = FNMS(Tl, T47, Tj * T48);
Chris@82 1476 T49 = FMA(Tj, T47, Tl * T48);
Chris@82 1477 T4W = FMA(TL, TU, TR * TO);
Chris@82 1478 }
Chris@82 1479 {
Chris@82 1480 E Tp, T41, Tt, T3Z;
Chris@82 1481 {
Chris@82 1482 E Tn, To, Tr, Ts;
Chris@82 1483 Tn = Ip[WS(rs, 9)];
Chris@82 1484 To = Im[WS(rs, 9)];
Chris@82 1485 Tp = Tn - To;
Chris@82 1486 T41 = Tn + To;
Chris@82 1487 Tr = Rp[WS(rs, 9)];
Chris@82 1488 Ts = Rm[WS(rs, 9)];
Chris@82 1489 Tt = Tr + Ts;
Chris@82 1490 T3Z = Tr - Ts;
Chris@82 1491 }
Chris@82 1492 Tu = FNMS(Tq, Tt, Tm * Tp);
Chris@82 1493 T5b = FNMS(T40, T3Z, T3Y * T41);
Chris@82 1494 T42 = FMA(T3Y, T3Z, T40 * T41);
Chris@82 1495 T4T = FMA(Tm, Tt, Tq * Tp);
Chris@82 1496 }
Chris@82 1497 {
Chris@82 1498 E TB, T45, TH, T44;
Chris@82 1499 {
Chris@82 1500 E Tz, TA, TF, TG;
Chris@82 1501 Tz = Ip[WS(rs, 5)];
Chris@82 1502 TA = Im[WS(rs, 5)];
Chris@82 1503 TB = Tz - TA;
Chris@82 1504 T45 = Tz + TA;
Chris@82 1505 TF = Rp[WS(rs, 5)];
Chris@82 1506 TG = Rm[WS(rs, 5)];
Chris@82 1507 TH = TF + TG;
Chris@82 1508 T44 = TF - TG;
Chris@82 1509 }
Chris@82 1510 TI = FNMS(TE, TH, Ty * TB);
Chris@82 1511 T5d = FNMS(T2V, T44, T2U * T45);
Chris@82 1512 T46 = FMA(T2U, T44, T2V * T45);
Chris@82 1513 T4V = FMA(Ty, TH, TE * TB);
Chris@82 1514 }
Chris@82 1515 Tv = Th + Tu;
Chris@82 1516 TW = TI + TV;
Chris@82 1517 T61 = Tv - TW;
Chris@82 1518 T4U = T4S + T4T;
Chris@82 1519 T4X = T4V + T4W;
Chris@82 1520 T62 = T4U - T4X;
Chris@82 1521 {
Chris@82 1522 E T43, T4a, T7t, T7u;
Chris@82 1523 T43 = T3X + T42;
Chris@82 1524 T4a = T46 + T49;
Chris@82 1525 T4b = T43 + T4a;
Chris@82 1526 T6c = T4a - T43;
Chris@82 1527 T7t = T5e - T5d;
Chris@82 1528 T7u = T46 - T49;
Chris@82 1529 T7v = T7t + T7u;
Chris@82 1530 T7C = T7t - T7u;
Chris@82 1531 }
Chris@82 1532 {
Chris@82 1533 E T5c, T5f, T72, T73;
Chris@82 1534 T5c = T5a + T5b;
Chris@82 1535 T5f = T5d + T5e;
Chris@82 1536 T5g = T5c + T5f;
Chris@82 1537 T6f = T5f - T5c;
Chris@82 1538 T72 = T4S - T4T;
Chris@82 1539 T73 = TI - TV;
Chris@82 1540 T74 = T72 + T73;
Chris@82 1541 T8G = T72 - T73;
Chris@82 1542 }
Chris@82 1543 {
Chris@82 1544 E T7q, T7r, T6Z, T70;
Chris@82 1545 T7q = T42 - T3X;
Chris@82 1546 T7r = T5a - T5b;
Chris@82 1547 T7s = T7q - T7r;
Chris@82 1548 T7B = T7r + T7q;
Chris@82 1549 T6Z = Th - Tu;
Chris@82 1550 T70 = T4V - T4W;
Chris@82 1551 T71 = T6Z - T70;
Chris@82 1552 T8F = T6Z + T70;
Chris@82 1553 }
Chris@82 1554 }
Chris@82 1555 {
Chris@82 1556 E T1a, T5h, T4k, T4Z, T1Q, T5l, T4y, T53, T1r, T5i, T4p, T50, T1F, T5k, T4t;
Chris@82 1557 E T52;
Chris@82 1558 {
Chris@82 1559 E T13, T4j, T19, T4f;
Chris@82 1560 {
Chris@82 1561 E T11, T12, T17, T18;
Chris@82 1562 T11 = Ip[WS(rs, 15)];
Chris@82 1563 T12 = Im[WS(rs, 15)];
Chris@82 1564 T13 = T11 - T12;
Chris@82 1565 T4j = T11 + T12;
Chris@82 1566 T17 = Rp[WS(rs, 15)];
Chris@82 1567 T18 = Rm[WS(rs, 15)];
Chris@82 1568 T19 = T17 + T18;
Chris@82 1569 T4f = T17 - T18;
Chris@82 1570 }
Chris@82 1571 T1a = FNMS(T16, T19, T10 * T13);
Chris@82 1572 T5h = FNMS(T4i, T4f, T4e * T4j);
Chris@82 1573 T4k = FMA(T4e, T4f, T4i * T4j);
Chris@82 1574 T4Z = FMA(T10, T19, T16 * T13);
Chris@82 1575 }
Chris@82 1576 {
Chris@82 1577 E T1L, T4x, T1P, T4v;
Chris@82 1578 {
Chris@82 1579 E T1J, T1K, T1N, T1O;
Chris@82 1580 T1J = Ip[WS(rs, 11)];
Chris@82 1581 T1K = Im[WS(rs, 11)];
Chris@82 1582 T1L = T1J - T1K;
Chris@82 1583 T4x = T1J + T1K;
Chris@82 1584 T1N = Rp[WS(rs, 11)];
Chris@82 1585 T1O = Rm[WS(rs, 11)];
Chris@82 1586 T1P = T1N + T1O;
Chris@82 1587 T4v = T1N - T1O;
Chris@82 1588 }
Chris@82 1589 T1Q = FNMS(T1M, T1P, T1I * T1L);
Chris@82 1590 T5l = FNMS(T4w, T4v, T4u * T4x);
Chris@82 1591 T4y = FMA(T4u, T4v, T4w * T4x);
Chris@82 1592 T53 = FMA(T1I, T1P, T1M * T1L);
Chris@82 1593 }
Chris@82 1594 {
Chris@82 1595 E T1m, T4o, T1q, T4m;
Chris@82 1596 {
Chris@82 1597 E T1k, T1l, T1o, T1p;
Chris@82 1598 T1k = Ip[WS(rs, 7)];
Chris@82 1599 T1l = Im[WS(rs, 7)];
Chris@82 1600 T1m = T1k - T1l;
Chris@82 1601 T4o = T1k + T1l;
Chris@82 1602 T1o = Rp[WS(rs, 7)];
Chris@82 1603 T1p = Rm[WS(rs, 7)];
Chris@82 1604 T1q = T1o + T1p;
Chris@82 1605 T4m = T1o - T1p;
Chris@82 1606 }
Chris@82 1607 T1r = FNMS(T1n, T1q, T1j * T1m);
Chris@82 1608 T5i = FNMS(T4n, T4m, T4l * T4o);
Chris@82 1609 T4p = FMA(T4l, T4m, T4n * T4o);
Chris@82 1610 T50 = FMA(T1j, T1q, T1n * T1m);
Chris@82 1611 }
Chris@82 1612 {
Chris@82 1613 E T1y, T4s, T1E, T4r;
Chris@82 1614 {
Chris@82 1615 E T1w, T1x, T1C, T1D;
Chris@82 1616 T1w = Ip[WS(rs, 3)];
Chris@82 1617 T1x = Im[WS(rs, 3)];
Chris@82 1618 T1y = T1w - T1x;
Chris@82 1619 T4s = T1w + T1x;
Chris@82 1620 T1C = Rp[WS(rs, 3)];
Chris@82 1621 T1D = Rm[WS(rs, 3)];
Chris@82 1622 T1E = T1C + T1D;
Chris@82 1623 T4r = T1C - T1D;
Chris@82 1624 }
Chris@82 1625 T1F = FNMS(T1B, T1E, T1v * T1y);
Chris@82 1626 T5k = FNMS(T26, T4r, T23 * T4s);
Chris@82 1627 T4t = FMA(T23, T4r, T26 * T4s);
Chris@82 1628 T52 = FMA(T1v, T1E, T1B * T1y);
Chris@82 1629 }
Chris@82 1630 T1s = T1a + T1r;
Chris@82 1631 T1R = T1F + T1Q;
Chris@82 1632 T65 = T1s - T1R;
Chris@82 1633 T51 = T4Z + T50;
Chris@82 1634 T54 = T52 + T53;
Chris@82 1635 T64 = T51 - T54;
Chris@82 1636 {
Chris@82 1637 E T4q, T4z, T7E, T7F;
Chris@82 1638 T4q = T4k + T4p;
Chris@82 1639 T4z = T4t + T4y;
Chris@82 1640 T4A = T4q + T4z;
Chris@82 1641 T6g = T4z - T4q;
Chris@82 1642 T7E = T5h - T5i;
Chris@82 1643 T7F = T4y - T4t;
Chris@82 1644 T7G = T7E + T7F;
Chris@82 1645 T8U = T7E - T7F;
Chris@82 1646 }
Chris@82 1647 {
Chris@82 1648 E T5j, T5m, T79, T7a;
Chris@82 1649 T5j = T5h + T5i;
Chris@82 1650 T5m = T5k + T5l;
Chris@82 1651 T5n = T5j + T5m;
Chris@82 1652 T6d = T5j - T5m;
Chris@82 1653 T79 = T4Z - T50;
Chris@82 1654 T7a = T1F - T1Q;
Chris@82 1655 T7b = T79 + T7a;
Chris@82 1656 T8J = T79 - T7a;
Chris@82 1657 }
Chris@82 1658 {
Chris@82 1659 E T7x, T7y, T76, T77;
Chris@82 1660 T7x = T4p - T4k;
Chris@82 1661 T7y = T5k - T5l;
Chris@82 1662 T7z = T7x - T7y;
Chris@82 1663 T8R = T7x + T7y;
Chris@82 1664 T76 = T1a - T1r;
Chris@82 1665 T77 = T52 - T53;
Chris@82 1666 T78 = T76 - T77;
Chris@82 1667 T8I = T76 + T77;
Chris@82 1668 }
Chris@82 1669 }
Chris@82 1670 {
Chris@82 1671 E T1T, T5S, T5M, T5W, T5P, T5X, T3a, T5I, T4C, T58, T56, T5H, T5E, T5G, T4R;
Chris@82 1672 E T5R;
Chris@82 1673 {
Chris@82 1674 E TX, T1S, T5K, T5L;
Chris@82 1675 TX = Tv + TW;
Chris@82 1676 T1S = T1s + T1R;
Chris@82 1677 T1T = TX + T1S;
Chris@82 1678 T5S = TX - T1S;
Chris@82 1679 T5K = T5n - T5g;
Chris@82 1680 T5L = T4b - T4A;
Chris@82 1681 T5M = T5K + T5L;
Chris@82 1682 T5W = T5K - T5L;
Chris@82 1683 }
Chris@82 1684 {
Chris@82 1685 E T5N, T5O, T2A, T39;
Chris@82 1686 T5N = T3T + T3A;
Chris@82 1687 T5O = T5C - T5v;
Chris@82 1688 T5P = T5N - T5O;
Chris@82 1689 T5X = T5N + T5O;
Chris@82 1690 T2A = T2g + T2z;
Chris@82 1691 T39 = T2T + T38;
Chris@82 1692 T3a = T2A + T39;
Chris@82 1693 T5I = T39 - T2A;
Chris@82 1694 }
Chris@82 1695 {
Chris@82 1696 E T3U, T4B, T4Y, T55;
Chris@82 1697 T3U = T3A - T3T;
Chris@82 1698 T4B = T4b + T4A;
Chris@82 1699 T4C = T3U - T4B;
Chris@82 1700 T58 = T4B + T3U;
Chris@82 1701 T4Y = T4U + T4X;
Chris@82 1702 T55 = T51 + T54;
Chris@82 1703 T56 = T4Y + T55;
Chris@82 1704 T5H = T55 - T4Y;
Chris@82 1705 }
Chris@82 1706 {
Chris@82 1707 E T5o, T5D, T4J, T4Q;
Chris@82 1708 T5o = T5g + T5n;
Chris@82 1709 T5D = T5v + T5C;
Chris@82 1710 T5E = T5o - T5D;
Chris@82 1711 T5G = T5o + T5D;
Chris@82 1712 T4J = T4F + T4I;
Chris@82 1713 T4Q = T4M + T4P;
Chris@82 1714 T4R = T4J + T4Q;
Chris@82 1715 T5R = T4J - T4Q;
Chris@82 1716 }
Chris@82 1717 {
Chris@82 1718 E T3b, T5F, T57, T59;
Chris@82 1719 T3b = T1T + T3a;
Chris@82 1720 Ip[0] = KP500000000 * (T3b + T4C);
Chris@82 1721 Im[WS(rs, 15)] = KP500000000 * (T4C - T3b);
Chris@82 1722 T5F = T4R + T56;
Chris@82 1723 Rm[WS(rs, 15)] = KP500000000 * (T5F - T5G);
Chris@82 1724 Rp[0] = KP500000000 * (T5F + T5G);
Chris@82 1725 T57 = T4R - T56;
Chris@82 1726 Rm[WS(rs, 7)] = KP500000000 * (T57 - T58);
Chris@82 1727 Rp[WS(rs, 8)] = KP500000000 * (T57 + T58);
Chris@82 1728 T59 = T3a - T1T;
Chris@82 1729 Ip[WS(rs, 8)] = KP500000000 * (T59 + T5E);
Chris@82 1730 Im[WS(rs, 7)] = KP500000000 * (T5E - T59);
Chris@82 1731 }
Chris@82 1732 {
Chris@82 1733 E T5J, T5Q, T5Z, T60;
Chris@82 1734 T5J = KP500000000 * (T5H + T5I);
Chris@82 1735 T5Q = KP353553390 * (T5M + T5P);
Chris@82 1736 Ip[WS(rs, 4)] = T5J + T5Q;
Chris@82 1737 Im[WS(rs, 11)] = T5Q - T5J;
Chris@82 1738 T5Z = KP500000000 * (T5R + T5S);
Chris@82 1739 T60 = KP353553390 * (T5W + T5X);
Chris@82 1740 Rm[WS(rs, 11)] = T5Z - T60;
Chris@82 1741 Rp[WS(rs, 4)] = T5Z + T60;
Chris@82 1742 }
Chris@82 1743 {
Chris@82 1744 E T5T, T5U, T5V, T5Y;
Chris@82 1745 T5T = KP500000000 * (T5R - T5S);
Chris@82 1746 T5U = KP353553390 * (T5P - T5M);
Chris@82 1747 Rm[WS(rs, 3)] = T5T - T5U;
Chris@82 1748 Rp[WS(rs, 12)] = T5T + T5U;
Chris@82 1749 T5V = KP500000000 * (T5I - T5H);
Chris@82 1750 T5Y = KP353553390 * (T5W - T5X);
Chris@82 1751 Ip[WS(rs, 12)] = T5V + T5Y;
Chris@82 1752 Im[WS(rs, 3)] = T5Y - T5V;
Chris@82 1753 }
Chris@82 1754 }
Chris@82 1755 {
Chris@82 1756 E T67, T6Q, T6K, T6U, T6N, T6V, T6a, T6G, T6i, T6A, T6t, T6P, T6w, T6F, T6p;
Chris@82 1757 E T6B;
Chris@82 1758 {
Chris@82 1759 E T63, T66, T6I, T6J;
Chris@82 1760 T63 = T61 - T62;
Chris@82 1761 T66 = T64 + T65;
Chris@82 1762 T67 = KP353553390 * (T63 + T66);
Chris@82 1763 T6Q = KP353553390 * (T63 - T66);
Chris@82 1764 T6I = T6d - T6c;
Chris@82 1765 T6J = T6g - T6f;
Chris@82 1766 T6K = FMA(KP461939766, T6I, KP191341716 * T6J);
Chris@82 1767 T6U = FNMS(KP461939766, T6J, KP191341716 * T6I);
Chris@82 1768 }
Chris@82 1769 {
Chris@82 1770 E T6L, T6M, T68, T69;
Chris@82 1771 T6L = T6k - T6j;
Chris@82 1772 T6M = T6n - T6m;
Chris@82 1773 T6N = FNMS(KP461939766, T6M, KP191341716 * T6L);
Chris@82 1774 T6V = FMA(KP461939766, T6L, KP191341716 * T6M);
Chris@82 1775 T68 = T4P - T4M;
Chris@82 1776 T69 = T38 - T2T;
Chris@82 1777 T6a = KP500000000 * (T68 + T69);
Chris@82 1778 T6G = KP500000000 * (T69 - T68);
Chris@82 1779 }
Chris@82 1780 {
Chris@82 1781 E T6e, T6h, T6r, T6s;
Chris@82 1782 T6e = T6c + T6d;
Chris@82 1783 T6h = T6f + T6g;
Chris@82 1784 T6i = FMA(KP191341716, T6e, KP461939766 * T6h);
Chris@82 1785 T6A = FNMS(KP191341716, T6h, KP461939766 * T6e);
Chris@82 1786 T6r = T4F - T4I;
Chris@82 1787 T6s = T2g - T2z;
Chris@82 1788 T6t = KP500000000 * (T6r + T6s);
Chris@82 1789 T6P = KP500000000 * (T6r - T6s);
Chris@82 1790 }
Chris@82 1791 {
Chris@82 1792 E T6u, T6v, T6l, T6o;
Chris@82 1793 T6u = T62 + T61;
Chris@82 1794 T6v = T64 - T65;
Chris@82 1795 T6w = KP353553390 * (T6u + T6v);
Chris@82 1796 T6F = KP353553390 * (T6v - T6u);
Chris@82 1797 T6l = T6j + T6k;
Chris@82 1798 T6o = T6m + T6n;
Chris@82 1799 T6p = FNMS(KP191341716, T6o, KP461939766 * T6l);
Chris@82 1800 T6B = FMA(KP191341716, T6l, KP461939766 * T6o);
Chris@82 1801 }
Chris@82 1802 {
Chris@82 1803 E T6b, T6q, T6D, T6E;
Chris@82 1804 T6b = T67 + T6a;
Chris@82 1805 T6q = T6i + T6p;
Chris@82 1806 Ip[WS(rs, 2)] = T6b + T6q;
Chris@82 1807 Im[WS(rs, 13)] = T6q - T6b;
Chris@82 1808 T6D = T6t + T6w;
Chris@82 1809 T6E = T6A + T6B;
Chris@82 1810 Rm[WS(rs, 13)] = T6D - T6E;
Chris@82 1811 Rp[WS(rs, 2)] = T6D + T6E;
Chris@82 1812 }
Chris@82 1813 {
Chris@82 1814 E T6x, T6y, T6z, T6C;
Chris@82 1815 T6x = T6t - T6w;
Chris@82 1816 T6y = T6p - T6i;
Chris@82 1817 Rm[WS(rs, 5)] = T6x - T6y;
Chris@82 1818 Rp[WS(rs, 10)] = T6x + T6y;
Chris@82 1819 T6z = T6a - T67;
Chris@82 1820 T6C = T6A - T6B;
Chris@82 1821 Ip[WS(rs, 10)] = T6z + T6C;
Chris@82 1822 Im[WS(rs, 5)] = T6C - T6z;
Chris@82 1823 }
Chris@82 1824 {
Chris@82 1825 E T6H, T6O, T6X, T6Y;
Chris@82 1826 T6H = T6F + T6G;
Chris@82 1827 T6O = T6K + T6N;
Chris@82 1828 Ip[WS(rs, 6)] = T6H + T6O;
Chris@82 1829 Im[WS(rs, 9)] = T6O - T6H;
Chris@82 1830 T6X = T6P + T6Q;
Chris@82 1831 T6Y = T6U + T6V;
Chris@82 1832 Rm[WS(rs, 9)] = T6X - T6Y;
Chris@82 1833 Rp[WS(rs, 6)] = T6X + T6Y;
Chris@82 1834 }
Chris@82 1835 {
Chris@82 1836 E T6R, T6S, T6T, T6W;
Chris@82 1837 T6R = T6P - T6Q;
Chris@82 1838 T6S = T6N - T6K;
Chris@82 1839 Rm[WS(rs, 1)] = T6R - T6S;
Chris@82 1840 Rp[WS(rs, 14)] = T6R + T6S;
Chris@82 1841 T6T = T6G - T6F;
Chris@82 1842 T6W = T6U - T6V;
Chris@82 1843 Ip[WS(rs, 14)] = T6T + T6W;
Chris@82 1844 Im[WS(rs, 1)] = T6W - T6T;
Chris@82 1845 }
Chris@82 1846 }
Chris@82 1847 {
Chris@82 1848 E T7d, T8w, T7o, T8m, T8c, T8l, T89, T8v, T81, T8B, T8h, T8t, T7I, T8A, T8g;
Chris@82 1849 E T8q;
Chris@82 1850 {
Chris@82 1851 E T75, T7c, T85, T88;
Chris@82 1852 T75 = FNMS(KP191341716, T74, KP461939766 * T71);
Chris@82 1853 T7c = FMA(KP461939766, T78, KP191341716 * T7b);
Chris@82 1854 T7d = T75 + T7c;
Chris@82 1855 T8w = T75 - T7c;
Chris@82 1856 {
Chris@82 1857 E T7k, T7n, T8a, T8b;
Chris@82 1858 T7k = KP353553390 * (T7g + T7j);
Chris@82 1859 T7n = KP500000000 * (T7l - T7m);
Chris@82 1860 T7o = T7k + T7n;
Chris@82 1861 T8m = T7n - T7k;
Chris@82 1862 T8a = FMA(KP191341716, T71, KP461939766 * T74);
Chris@82 1863 T8b = FNMS(KP191341716, T78, KP461939766 * T7b);
Chris@82 1864 T8c = T8a + T8b;
Chris@82 1865 T8l = T8b - T8a;
Chris@82 1866 }
Chris@82 1867 T85 = KP500000000 * (T83 + T84);
Chris@82 1868 T88 = KP353553390 * (T86 + T87);
Chris@82 1869 T89 = T85 + T88;
Chris@82 1870 T8v = T85 - T88;
Chris@82 1871 {
Chris@82 1872 E T7T, T8r, T80, T8s, T7P, T7W;
Chris@82 1873 T7P = KP707106781 * (T7L + T7O);
Chris@82 1874 T7T = T7P + T7S;
Chris@82 1875 T8r = T7S - T7P;
Chris@82 1876 T7W = KP707106781 * (T7U + T7V);
Chris@82 1877 T80 = T7W + T7Z;
Chris@82 1878 T8s = T7Z - T7W;
Chris@82 1879 T81 = FNMS(KP097545161, T80, KP490392640 * T7T);
Chris@82 1880 T8B = FMA(KP415734806, T8r, KP277785116 * T8s);
Chris@82 1881 T8h = FMA(KP097545161, T7T, KP490392640 * T80);
Chris@82 1882 T8t = FNMS(KP415734806, T8s, KP277785116 * T8r);
Chris@82 1883 }
Chris@82 1884 {
Chris@82 1885 E T7A, T8o, T7H, T8p, T7w, T7D;
Chris@82 1886 T7w = KP707106781 * (T7s + T7v);
Chris@82 1887 T7A = T7w + T7z;
Chris@82 1888 T8o = T7z - T7w;
Chris@82 1889 T7D = KP707106781 * (T7B + T7C);
Chris@82 1890 T7H = T7D + T7G;
Chris@82 1891 T8p = T7G - T7D;
Chris@82 1892 T7I = FMA(KP490392640, T7A, KP097545161 * T7H);
Chris@82 1893 T8A = FNMS(KP415734806, T8o, KP277785116 * T8p);
Chris@82 1894 T8g = FNMS(KP097545161, T7A, KP490392640 * T7H);
Chris@82 1895 T8q = FMA(KP277785116, T8o, KP415734806 * T8p);
Chris@82 1896 }
Chris@82 1897 }
Chris@82 1898 {
Chris@82 1899 E T7p, T82, T8j, T8k;
Chris@82 1900 T7p = T7d + T7o;
Chris@82 1901 T82 = T7I + T81;
Chris@82 1902 Ip[WS(rs, 1)] = T7p + T82;
Chris@82 1903 Im[WS(rs, 14)] = T82 - T7p;
Chris@82 1904 T8j = T89 + T8c;
Chris@82 1905 T8k = T8g + T8h;
Chris@82 1906 Rm[WS(rs, 14)] = T8j - T8k;
Chris@82 1907 Rp[WS(rs, 1)] = T8j + T8k;
Chris@82 1908 }
Chris@82 1909 {
Chris@82 1910 E T8d, T8e, T8f, T8i;
Chris@82 1911 T8d = T89 - T8c;
Chris@82 1912 T8e = T81 - T7I;
Chris@82 1913 Rm[WS(rs, 6)] = T8d - T8e;
Chris@82 1914 Rp[WS(rs, 9)] = T8d + T8e;
Chris@82 1915 T8f = T7o - T7d;
Chris@82 1916 T8i = T8g - T8h;
Chris@82 1917 Ip[WS(rs, 9)] = T8f + T8i;
Chris@82 1918 Im[WS(rs, 6)] = T8i - T8f;
Chris@82 1919 }
Chris@82 1920 {
Chris@82 1921 E T8n, T8u, T8D, T8E;
Chris@82 1922 T8n = T8l + T8m;
Chris@82 1923 T8u = T8q + T8t;
Chris@82 1924 Ip[WS(rs, 5)] = T8n + T8u;
Chris@82 1925 Im[WS(rs, 10)] = T8u - T8n;
Chris@82 1926 T8D = T8v + T8w;
Chris@82 1927 T8E = T8A + T8B;
Chris@82 1928 Rm[WS(rs, 10)] = T8D - T8E;
Chris@82 1929 Rp[WS(rs, 5)] = T8D + T8E;
Chris@82 1930 }
Chris@82 1931 {
Chris@82 1932 E T8x, T8y, T8z, T8C;
Chris@82 1933 T8x = T8v - T8w;
Chris@82 1934 T8y = T8t - T8q;
Chris@82 1935 Rm[WS(rs, 2)] = T8x - T8y;
Chris@82 1936 Rp[WS(rs, 13)] = T8x + T8y;
Chris@82 1937 T8z = T8m - T8l;
Chris@82 1938 T8C = T8A - T8B;
Chris@82 1939 Ip[WS(rs, 13)] = T8z + T8C;
Chris@82 1940 Im[WS(rs, 2)] = T8C - T8z;
Chris@82 1941 }
Chris@82 1942 }
Chris@82 1943 {
Chris@82 1944 E T8L, T9u, T8O, T9k, T9a, T9j, T97, T9t, T93, T9z, T9f, T9r, T8W, T9y, T9e;
Chris@82 1945 E T9o;
Chris@82 1946 {
Chris@82 1947 E T8H, T8K, T95, T96;
Chris@82 1948 T8H = FNMS(KP461939766, T8G, KP191341716 * T8F);
Chris@82 1949 T8K = FMA(KP191341716, T8I, KP461939766 * T8J);
Chris@82 1950 T8L = T8H + T8K;
Chris@82 1951 T9u = T8H - T8K;
Chris@82 1952 {
Chris@82 1953 E T8M, T8N, T98, T99;
Chris@82 1954 T8M = KP353553390 * (T87 - T86);
Chris@82 1955 T8N = KP500000000 * (T7m + T7l);
Chris@82 1956 T8O = T8M + T8N;
Chris@82 1957 T9k = T8N - T8M;
Chris@82 1958 T98 = FMA(KP461939766, T8F, KP191341716 * T8G);
Chris@82 1959 T99 = FNMS(KP461939766, T8I, KP191341716 * T8J);
Chris@82 1960 T9a = T98 + T99;
Chris@82 1961 T9j = T99 - T98;
Chris@82 1962 }
Chris@82 1963 T95 = KP500000000 * (T83 - T84);
Chris@82 1964 T96 = KP353553390 * (T7g - T7j);
Chris@82 1965 T97 = T95 + T96;
Chris@82 1966 T9t = T95 - T96;
Chris@82 1967 {
Chris@82 1968 E T8Z, T9p, T92, T9q, T8X, T90;
Chris@82 1969 T8X = KP707106781 * (T7V - T7U);
Chris@82 1970 T8Z = T8X + T8Y;
Chris@82 1971 T9p = T8Y - T8X;
Chris@82 1972 T90 = KP707106781 * (T7L - T7O);
Chris@82 1973 T92 = T90 + T91;
Chris@82 1974 T9q = T91 - T90;
Chris@82 1975 T93 = FNMS(KP277785116, T92, KP415734806 * T8Z);
Chris@82 1976 T9z = FMA(KP490392640, T9p, KP097545161 * T9q);
Chris@82 1977 T9f = FMA(KP277785116, T8Z, KP415734806 * T92);
Chris@82 1978 T9r = FNMS(KP490392640, T9q, KP097545161 * T9p);
Chris@82 1979 }
Chris@82 1980 {
Chris@82 1981 E T8S, T9m, T8V, T9n, T8Q, T8T;
Chris@82 1982 T8Q = KP707106781 * (T7C - T7B);
Chris@82 1983 T8S = T8Q + T8R;
Chris@82 1984 T9m = T8R - T8Q;
Chris@82 1985 T8T = KP707106781 * (T7s - T7v);
Chris@82 1986 T8V = T8T + T8U;
Chris@82 1987 T9n = T8U - T8T;
Chris@82 1988 T8W = FMA(KP415734806, T8S, KP277785116 * T8V);
Chris@82 1989 T9y = FNMS(KP490392640, T9m, KP097545161 * T9n);
Chris@82 1990 T9e = FNMS(KP277785116, T8S, KP415734806 * T8V);
Chris@82 1991 T9o = FMA(KP097545161, T9m, KP490392640 * T9n);
Chris@82 1992 }
Chris@82 1993 }
Chris@82 1994 {
Chris@82 1995 E T8P, T94, T9h, T9i;
Chris@82 1996 T8P = T8L + T8O;
Chris@82 1997 T94 = T8W + T93;
Chris@82 1998 Ip[WS(rs, 3)] = T8P + T94;
Chris@82 1999 Im[WS(rs, 12)] = T94 - T8P;
Chris@82 2000 T9h = T97 + T9a;
Chris@82 2001 T9i = T9e + T9f;
Chris@82 2002 Rm[WS(rs, 12)] = T9h - T9i;
Chris@82 2003 Rp[WS(rs, 3)] = T9h + T9i;
Chris@82 2004 }
Chris@82 2005 {
Chris@82 2006 E T9b, T9c, T9d, T9g;
Chris@82 2007 T9b = T97 - T9a;
Chris@82 2008 T9c = T93 - T8W;
Chris@82 2009 Rm[WS(rs, 4)] = T9b - T9c;
Chris@82 2010 Rp[WS(rs, 11)] = T9b + T9c;
Chris@82 2011 T9d = T8O - T8L;
Chris@82 2012 T9g = T9e - T9f;
Chris@82 2013 Ip[WS(rs, 11)] = T9d + T9g;
Chris@82 2014 Im[WS(rs, 4)] = T9g - T9d;
Chris@82 2015 }
Chris@82 2016 {
Chris@82 2017 E T9l, T9s, T9B, T9C;
Chris@82 2018 T9l = T9j + T9k;
Chris@82 2019 T9s = T9o + T9r;
Chris@82 2020 Ip[WS(rs, 7)] = T9l + T9s;
Chris@82 2021 Im[WS(rs, 8)] = T9s - T9l;
Chris@82 2022 T9B = T9t + T9u;
Chris@82 2023 T9C = T9y + T9z;
Chris@82 2024 Rm[WS(rs, 8)] = T9B - T9C;
Chris@82 2025 Rp[WS(rs, 7)] = T9B + T9C;
Chris@82 2026 }
Chris@82 2027 {
Chris@82 2028 E T9v, T9w, T9x, T9A;
Chris@82 2029 T9v = T9t - T9u;
Chris@82 2030 T9w = T9r - T9o;
Chris@82 2031 Rm[0] = T9v - T9w;
Chris@82 2032 Rp[WS(rs, 15)] = T9v + T9w;
Chris@82 2033 T9x = T9k - T9j;
Chris@82 2034 T9A = T9y - T9z;
Chris@82 2035 Ip[WS(rs, 15)] = T9x + T9A;
Chris@82 2036 Im[0] = T9A - T9x;
Chris@82 2037 }
Chris@82 2038 }
Chris@82 2039 }
Chris@82 2040 }
Chris@82 2041 }
Chris@82 2042 }
Chris@82 2043
Chris@82 2044 static const tw_instr twinstr[] = {
Chris@82 2045 {TW_CEXP, 1, 1},
Chris@82 2046 {TW_CEXP, 1, 3},
Chris@82 2047 {TW_CEXP, 1, 9},
Chris@82 2048 {TW_CEXP, 1, 27},
Chris@82 2049 {TW_NEXT, 1, 0}
Chris@82 2050 };
Chris@82 2051
Chris@82 2052 static const hc2c_desc desc = { 32, "hc2cfdft2_32", twinstr, &GENUS, {440, 188, 112, 0} };
Chris@82 2053
Chris@82 2054 void X(codelet_hc2cfdft2_32) (planner *p) {
Chris@82 2055 X(khc2c_register) (p, hc2cfdft2_32, &desc, HC2C_VIA_DFT);
Chris@82 2056 }
Chris@82 2057 #endif