annotate src/fftw-3.3.5/rdft/scalar/r2cb/hc2cb2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:51:43 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 488 FP additions, 350 FP multiplications,
Chris@42 32 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
Chris@42 33 * 204 stack variables, 7 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cb.h"
Chris@42 36
Chris@42 37 static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 46 {
Chris@42 47 INT m;
Chris@42 48 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 49 E T5u, T6b, T6e, T5I, T66, T60, T5U, T5R, T67, T5L, T61, T5x, T5A, T5D, T5O;
Chris@42 50 E T62, T5V, T5P;
Chris@42 51 {
Chris@42 52 E T11, T14, T12, T37, T17, T1b, T39, T15, T7C, T8P, T8S, T7I, T98, T7e, T78;
Chris@42 53 E T8V, T3d, T3x, T3a, T3v, T9s, T3G, T4p, T5X, T16, T9m, T3y, T4b, T3C, T4g;
Chris@42 54 E T5Z, T1a, T4r, T3J, T2O, T1c, T4W, T4s, T3Y, T3K, T3l, T3e, T3i, T3q, T8K;
Chris@42 55 E T8E, T8m, T7S, T5k, T5e;
Chris@42 56 {
Chris@42 57 E T13, T3c, T38, T3F, T7B, T9l, T77, T7d, T9r, T7H;
Chris@42 58 T11 = W[2];
Chris@42 59 T14 = W[3];
Chris@42 60 T12 = W[4];
Chris@42 61 T37 = W[0];
Chris@42 62 T17 = W[6];
Chris@42 63 T1b = W[7];
Chris@42 64 T13 = T11 * T12;
Chris@42 65 T3c = T37 * T14;
Chris@42 66 T38 = T37 * T11;
Chris@42 67 T3F = T37 * T12;
Chris@42 68 T7B = T11 * T17;
Chris@42 69 T9l = T12 * T17;
Chris@42 70 T77 = T37 * T17;
Chris@42 71 T7d = T37 * T1b;
Chris@42 72 T9r = T12 * T1b;
Chris@42 73 T7H = T11 * T1b;
Chris@42 74 T39 = W[1];
Chris@42 75 T15 = W[5];
Chris@42 76 {
Chris@42 77 E T3I, T19, T5d, T3b, T18, T2N;
Chris@42 78 T7C = FMA(T14, T1b, T7B);
Chris@42 79 T8P = FNMS(T14, T1b, T7B);
Chris@42 80 T8S = FMA(T14, T17, T7H);
Chris@42 81 T7I = FNMS(T14, T17, T7H);
Chris@42 82 T98 = FNMS(T39, T17, T7d);
Chris@42 83 T7e = FMA(T39, T17, T7d);
Chris@42 84 T78 = FNMS(T39, T1b, T77);
Chris@42 85 T8V = FMA(T39, T1b, T77);
Chris@42 86 T3d = FMA(T39, T11, T3c);
Chris@42 87 T3x = FNMS(T39, T11, T3c);
Chris@42 88 T3a = FNMS(T39, T14, T38);
Chris@42 89 T3v = FMA(T39, T14, T38);
Chris@42 90 T9s = FNMS(T15, T17, T9r);
Chris@42 91 T3G = FNMS(T39, T15, T3F);
Chris@42 92 T4p = FMA(T39, T15, T3F);
Chris@42 93 T5X = FNMS(T14, T15, T13);
Chris@42 94 T16 = FMA(T14, T15, T13);
Chris@42 95 T3I = T37 * T15;
Chris@42 96 T19 = T11 * T15;
Chris@42 97 T5d = T3v * T12;
Chris@42 98 T3b = T3a * T12;
Chris@42 99 T9m = FMA(T15, T1b, T9l);
Chris@42 100 {
Chris@42 101 E T3w, T3B, T5t, T5H;
Chris@42 102 T3w = T3v * T17;
Chris@42 103 T3B = T3v * T1b;
Chris@42 104 T5t = T3a * T17;
Chris@42 105 T5H = T3a * T1b;
Chris@42 106 T3y = FNMS(T3x, T1b, T3w);
Chris@42 107 T4b = FMA(T3x, T1b, T3w);
Chris@42 108 T3C = FMA(T3x, T17, T3B);
Chris@42 109 T4g = FNMS(T3x, T17, T3B);
Chris@42 110 T5u = FMA(T3d, T1b, T5t);
Chris@42 111 T6b = FNMS(T3d, T1b, T5t);
Chris@42 112 T6e = FMA(T3d, T17, T5H);
Chris@42 113 T5I = FNMS(T3d, T17, T5H);
Chris@42 114 T18 = T16 * T17;
Chris@42 115 T2N = T16 * T1b;
Chris@42 116 T5Z = FMA(T14, T12, T19);
Chris@42 117 T1a = FNMS(T14, T12, T19);
Chris@42 118 }
Chris@42 119 {
Chris@42 120 E T3H, T3X, T4q, T4V, T5Y, T65;
Chris@42 121 T4q = T4p * T17;
Chris@42 122 T4V = T4p * T1b;
Chris@42 123 T4r = FNMS(T39, T12, T3I);
Chris@42 124 T3J = FMA(T39, T12, T3I);
Chris@42 125 T2O = FNMS(T1a, T17, T2N);
Chris@42 126 T1c = FMA(T1a, T1b, T18);
Chris@42 127 T3H = T3G * T17;
Chris@42 128 T4W = FNMS(T4r, T17, T4V);
Chris@42 129 T4s = FMA(T4r, T1b, T4q);
Chris@42 130 T3X = T3G * T1b;
Chris@42 131 T5Y = T5X * T17;
Chris@42 132 T65 = T5X * T1b;
Chris@42 133 T3Y = FNMS(T3J, T17, T3X);
Chris@42 134 T3K = FMA(T3J, T1b, T3H);
Chris@42 135 {
Chris@42 136 E T8J, T8D, T3h, T5j, T8l, T7R;
Chris@42 137 T3h = T3a * T15;
Chris@42 138 T66 = FNMS(T5Z, T17, T65);
Chris@42 139 T60 = FMA(T5Z, T1b, T5Y);
Chris@42 140 T3l = FNMS(T3d, T15, T3b);
Chris@42 141 T3e = FMA(T3d, T15, T3b);
Chris@42 142 T3i = FNMS(T3d, T12, T3h);
Chris@42 143 T3q = FMA(T3d, T12, T3h);
Chris@42 144 T8J = T3l * T1b;
Chris@42 145 T8D = T3l * T17;
Chris@42 146 T5j = T3v * T15;
Chris@42 147 T8l = T3e * T1b;
Chris@42 148 T7R = T3e * T17;
Chris@42 149 T8K = FNMS(T3q, T17, T8J);
Chris@42 150 T8E = FMA(T3q, T1b, T8D);
Chris@42 151 T8m = FNMS(T3i, T17, T8l);
Chris@42 152 T7S = FMA(T3i, T1b, T7R);
Chris@42 153 T5U = FNMS(T3x, T12, T5j);
Chris@42 154 T5k = FMA(T3x, T12, T5j);
Chris@42 155 T5e = FNMS(T3x, T15, T5d);
Chris@42 156 T5R = FMA(T3x, T15, T5d);
Chris@42 157 }
Chris@42 158 }
Chris@42 159 }
Chris@42 160 }
Chris@42 161 {
Chris@42 162 E T6O, T6i, T7s, T7o, T6j, Tf, T8W, T7V, T99, T8p, T3L, T1t, T3Z, T2X, T5J;
Chris@42 163 E T4Z, T7t, T6W, T5v, T4v, TZ, T7x, T91, T9d, T28, T3S, T3R, T2h, T5B, T4Q;
Chris@42 164 E T8v, T8a, T5C, T4N, T6Z, T6J, TK, T7w, T3P, T2z, T9c, T94, T3O, T2I, T5y;
Chris@42 165 E T4J, T8u, T8h, T5z, T4G, T6Y, T6A, T6P, Tu, T9a, T82, T8X, T8s, T4y, T40;
Chris@42 166 E T1Q, T3M, T30, T4B, T5w, T52, T7u, T6q;
Chris@42 167 {
Chris@42 168 E T6B, T6I, T4M, T4L, T4t, T4u, T6s, T6z;
Chris@42 169 {
Chris@42 170 E T1d, T3, T6Q, T2S, T2P, T6, T6R, T1g, Td, T6U, T1i, Ta, T2V, T1r, T6T;
Chris@42 171 E T1l;
Chris@42 172 {
Chris@42 173 E T2Q, T2R, T4, T5, T1, T2, T1e, T1f;
Chris@42 174 T1 = Rp[0];
Chris@42 175 T2 = Rm[WS(rs, 15)];
Chris@42 176 {
Chris@42 177 E T6N, T6h, T7r, T7n;
Chris@42 178 T6N = T5R * T1b;
Chris@42 179 T6h = T5R * T17;
Chris@42 180 T7r = T5e * T1b;
Chris@42 181 T7n = T5e * T17;
Chris@42 182 T6O = FNMS(T5U, T17, T6N);
Chris@42 183 T6i = FMA(T5U, T1b, T6h);
Chris@42 184 T7s = FNMS(T5k, T17, T7r);
Chris@42 185 T7o = FMA(T5k, T1b, T7n);
Chris@42 186 T1d = T1 - T2;
Chris@42 187 T3 = T1 + T2;
Chris@42 188 }
Chris@42 189 T2Q = Ip[0];
Chris@42 190 T2R = Im[WS(rs, 15)];
Chris@42 191 T4 = Rp[WS(rs, 8)];
Chris@42 192 T5 = Rm[WS(rs, 7)];
Chris@42 193 T1e = Ip[WS(rs, 8)];
Chris@42 194 T6Q = T2Q - T2R;
Chris@42 195 T2S = T2Q + T2R;
Chris@42 196 T2P = T4 - T5;
Chris@42 197 T6 = T4 + T5;
Chris@42 198 T1f = Im[WS(rs, 7)];
Chris@42 199 {
Chris@42 200 E T1o, T1n, T1p, Tb, Tc;
Chris@42 201 Tb = Rm[WS(rs, 3)];
Chris@42 202 Tc = Rp[WS(rs, 12)];
Chris@42 203 T1o = Ip[WS(rs, 12)];
Chris@42 204 T6R = T1e - T1f;
Chris@42 205 T1g = T1e + T1f;
Chris@42 206 T1n = Tb - Tc;
Chris@42 207 Td = Tb + Tc;
Chris@42 208 T1p = Im[WS(rs, 3)];
Chris@42 209 {
Chris@42 210 E T1j, T1k, T8, T9, T1q;
Chris@42 211 T8 = Rp[WS(rs, 4)];
Chris@42 212 T9 = Rm[WS(rs, 11)];
Chris@42 213 T1q = T1o + T1p;
Chris@42 214 T6U = T1o - T1p;
Chris@42 215 T1j = Ip[WS(rs, 4)];
Chris@42 216 T1i = T8 - T9;
Chris@42 217 Ta = T8 + T9;
Chris@42 218 T1k = Im[WS(rs, 11)];
Chris@42 219 T2V = T1n + T1q;
Chris@42 220 T1r = T1n - T1q;
Chris@42 221 T6T = T1j - T1k;
Chris@42 222 T1l = T1j + T1k;
Chris@42 223 }
Chris@42 224 }
Chris@42 225 }
Chris@42 226 {
Chris@42 227 E T2U, T6V, T6S, T1h, T1s, T4Y, T4X, T2T, T2W;
Chris@42 228 {
Chris@42 229 E T7T, T8o, T1m, T7U, T7, Te, T8n;
Chris@42 230 T7T = T3 - T6;
Chris@42 231 T7 = T3 + T6;
Chris@42 232 Te = Ta + Td;
Chris@42 233 T8o = Ta - Td;
Chris@42 234 T1m = T1i - T1l;
Chris@42 235 T2U = T1i + T1l;
Chris@42 236 T6j = T7 - Te;
Chris@42 237 Tf = T7 + Te;
Chris@42 238 T7U = T6U - T6T;
Chris@42 239 T6V = T6T + T6U;
Chris@42 240 T6S = T6Q + T6R;
Chris@42 241 T8n = T6Q - T6R;
Chris@42 242 T4t = T1d + T1g;
Chris@42 243 T1h = T1d - T1g;
Chris@42 244 T8W = T7T + T7U;
Chris@42 245 T7V = T7T - T7U;
Chris@42 246 T99 = T8o + T8n;
Chris@42 247 T8p = T8n - T8o;
Chris@42 248 T1s = T1m + T1r;
Chris@42 249 T4Y = T1m - T1r;
Chris@42 250 }
Chris@42 251 T4X = T2S - T2P;
Chris@42 252 T2T = T2P + T2S;
Chris@42 253 T2W = T2U - T2V;
Chris@42 254 T4u = T2U + T2V;
Chris@42 255 T3L = FMA(KP707106781, T1s, T1h);
Chris@42 256 T1t = FNMS(KP707106781, T1s, T1h);
Chris@42 257 T3Z = FMA(KP707106781, T2W, T2T);
Chris@42 258 T2X = FNMS(KP707106781, T2W, T2T);
Chris@42 259 T5J = FNMS(KP707106781, T4Y, T4X);
Chris@42 260 T4Z = FMA(KP707106781, T4Y, T4X);
Chris@42 261 T7t = T6S + T6V;
Chris@42 262 T6W = T6S - T6V;
Chris@42 263 }
Chris@42 264 }
Chris@42 265 {
Chris@42 266 E T29, T1S, T1V, T87, TR, T2c, T84, T6E, TU, T23, T6F, T22, TX, T24, T2e;
Chris@42 267 E T21;
Chris@42 268 {
Chris@42 269 E TO, TN, TP, TL, TM;
Chris@42 270 TL = Rm[0];
Chris@42 271 TM = Rp[WS(rs, 15)];
Chris@42 272 TO = Rp[WS(rs, 7)];
Chris@42 273 T5v = FMA(KP707106781, T4u, T4t);
Chris@42 274 T4v = FNMS(KP707106781, T4u, T4t);
Chris@42 275 TN = TL + TM;
Chris@42 276 T29 = TL - TM;
Chris@42 277 TP = Rm[WS(rs, 8)];
Chris@42 278 {
Chris@42 279 E T6C, T6D, T1X, T20;
Chris@42 280 {
Chris@42 281 E T2a, T2b, T1T, T1U, TQ;
Chris@42 282 T1T = Ip[WS(rs, 15)];
Chris@42 283 T1U = Im[0];
Chris@42 284 TQ = TO + TP;
Chris@42 285 T1S = TO - TP;
Chris@42 286 T2a = Ip[WS(rs, 7)];
Chris@42 287 T6C = T1T - T1U;
Chris@42 288 T1V = T1T + T1U;
Chris@42 289 T2b = Im[WS(rs, 8)];
Chris@42 290 T87 = TN - TQ;
Chris@42 291 TR = TN + TQ;
Chris@42 292 T2c = T2a + T2b;
Chris@42 293 T6D = T2a - T2b;
Chris@42 294 }
Chris@42 295 {
Chris@42 296 E T1Y, T1Z, TS, TT, TV, TW;
Chris@42 297 TS = Rp[WS(rs, 3)];
Chris@42 298 TT = Rm[WS(rs, 12)];
Chris@42 299 T84 = T6C - T6D;
Chris@42 300 T6E = T6C + T6D;
Chris@42 301 T1Y = Ip[WS(rs, 3)];
Chris@42 302 T1X = TS - TT;
Chris@42 303 TU = TS + TT;
Chris@42 304 T1Z = Im[WS(rs, 12)];
Chris@42 305 TV = Rm[WS(rs, 4)];
Chris@42 306 TW = Rp[WS(rs, 11)];
Chris@42 307 T23 = Ip[WS(rs, 11)];
Chris@42 308 T6F = T1Y - T1Z;
Chris@42 309 T20 = T1Y + T1Z;
Chris@42 310 T22 = TV - TW;
Chris@42 311 TX = TV + TW;
Chris@42 312 T24 = Im[WS(rs, 4)];
Chris@42 313 }
Chris@42 314 T2e = T1X - T20;
Chris@42 315 T21 = T1X + T20;
Chris@42 316 }
Chris@42 317 }
Chris@42 318 {
Chris@42 319 E TY, T85, T25, T6G;
Chris@42 320 TY = TU + TX;
Chris@42 321 T85 = TU - TX;
Chris@42 322 T25 = T23 + T24;
Chris@42 323 T6G = T23 - T24;
Chris@42 324 {
Chris@42 325 E T4O, T1W, T2f, T8Z, T86, T89, T90, T27, T88, T26, T6H, T4P, T2d, T2g;
Chris@42 326 T4O = T1S + T1V;
Chris@42 327 T1W = T1S - T1V;
Chris@42 328 TZ = TR + TY;
Chris@42 329 T6B = TR - TY;
Chris@42 330 T88 = T6G - T6F;
Chris@42 331 T6H = T6F + T6G;
Chris@42 332 T26 = T22 + T25;
Chris@42 333 T2f = T22 - T25;
Chris@42 334 T6I = T6E - T6H;
Chris@42 335 T7x = T6E + T6H;
Chris@42 336 T8Z = T85 + T84;
Chris@42 337 T86 = T84 - T85;
Chris@42 338 T89 = T87 - T88;
Chris@42 339 T90 = T87 + T88;
Chris@42 340 T27 = T21 - T26;
Chris@42 341 T4M = T21 + T26;
Chris@42 342 T4L = T29 + T2c;
Chris@42 343 T2d = T29 - T2c;
Chris@42 344 T2g = T2e + T2f;
Chris@42 345 T4P = T2e - T2f;
Chris@42 346 T91 = FNMS(KP414213562, T90, T8Z);
Chris@42 347 T9d = FMA(KP414213562, T8Z, T90);
Chris@42 348 T28 = FNMS(KP707106781, T27, T1W);
Chris@42 349 T3S = FMA(KP707106781, T27, T1W);
Chris@42 350 T3R = FMA(KP707106781, T2g, T2d);
Chris@42 351 T2h = FNMS(KP707106781, T2g, T2d);
Chris@42 352 T5B = FMA(KP707106781, T4P, T4O);
Chris@42 353 T4Q = FNMS(KP707106781, T4P, T4O);
Chris@42 354 T8v = FNMS(KP414213562, T86, T89);
Chris@42 355 T8a = FMA(KP414213562, T89, T86);
Chris@42 356 }
Chris@42 357 }
Chris@42 358 }
Chris@42 359 {
Chris@42 360 E T2A, T2j, TC, T8e, T2m, T2D, T6v, T8b, TF, T6w, T2F, T2s, T2t, TI, T6x;
Chris@42 361 E T2w, TJ, T8c;
Chris@42 362 {
Chris@42 363 E Tw, Tx, Tz, TA, T6t, T6u;
Chris@42 364 Tw = Rp[WS(rs, 1)];
Chris@42 365 T5C = FMA(KP707106781, T4M, T4L);
Chris@42 366 T4N = FNMS(KP707106781, T4M, T4L);
Chris@42 367 T6Z = T6I - T6B;
Chris@42 368 T6J = T6B + T6I;
Chris@42 369 Tx = Rm[WS(rs, 14)];
Chris@42 370 Tz = Rp[WS(rs, 9)];
Chris@42 371 TA = Rm[WS(rs, 6)];
Chris@42 372 {
Chris@42 373 E T2k, Ty, TB, T2l, T2B, T2C;
Chris@42 374 T2k = Ip[WS(rs, 1)];
Chris@42 375 T2A = Tw - Tx;
Chris@42 376 Ty = Tw + Tx;
Chris@42 377 T2j = Tz - TA;
Chris@42 378 TB = Tz + TA;
Chris@42 379 T2l = Im[WS(rs, 14)];
Chris@42 380 T2B = Ip[WS(rs, 9)];
Chris@42 381 T2C = Im[WS(rs, 6)];
Chris@42 382 TC = Ty + TB;
Chris@42 383 T8e = Ty - TB;
Chris@42 384 T2m = T2k + T2l;
Chris@42 385 T6t = T2k - T2l;
Chris@42 386 T6u = T2B - T2C;
Chris@42 387 T2D = T2B + T2C;
Chris@42 388 }
Chris@42 389 {
Chris@42 390 E TG, T2o, T2r, TH, T2u, T2v;
Chris@42 391 {
Chris@42 392 E TD, TE, T2p, T2q;
Chris@42 393 TD = Rp[WS(rs, 5)];
Chris@42 394 T6v = T6t + T6u;
Chris@42 395 T8b = T6t - T6u;
Chris@42 396 TE = Rm[WS(rs, 10)];
Chris@42 397 T2p = Ip[WS(rs, 5)];
Chris@42 398 T2q = Im[WS(rs, 10)];
Chris@42 399 TG = Rm[WS(rs, 2)];
Chris@42 400 T2o = TD - TE;
Chris@42 401 TF = TD + TE;
Chris@42 402 T6w = T2p - T2q;
Chris@42 403 T2r = T2p + T2q;
Chris@42 404 TH = Rp[WS(rs, 13)];
Chris@42 405 T2u = Ip[WS(rs, 13)];
Chris@42 406 T2v = Im[WS(rs, 2)];
Chris@42 407 }
Chris@42 408 T2F = T2o - T2r;
Chris@42 409 T2s = T2o + T2r;
Chris@42 410 T2t = TG - TH;
Chris@42 411 TI = TG + TH;
Chris@42 412 T6x = T2u - T2v;
Chris@42 413 T2w = T2u + T2v;
Chris@42 414 }
Chris@42 415 }
Chris@42 416 TJ = TF + TI;
Chris@42 417 T8c = TF - TI;
Chris@42 418 {
Chris@42 419 E T8f, T6y, T2x, T2G;
Chris@42 420 T8f = T6x - T6w;
Chris@42 421 T6y = T6w + T6x;
Chris@42 422 T2x = T2t + T2w;
Chris@42 423 T2G = T2t - T2w;
Chris@42 424 {
Chris@42 425 E T4H, T2n, T2y, T4F, T8d, T92, T93, T8g;
Chris@42 426 T6s = TC - TJ;
Chris@42 427 TK = TC + TJ;
Chris@42 428 T7w = T6v + T6y;
Chris@42 429 T6z = T6v - T6y;
Chris@42 430 T4H = T2m - T2j;
Chris@42 431 T2n = T2j + T2m;
Chris@42 432 T2y = T2s - T2x;
Chris@42 433 T4F = T2s + T2x;
Chris@42 434 T8d = T8b - T8c;
Chris@42 435 T92 = T8c + T8b;
Chris@42 436 T93 = T8e + T8f;
Chris@42 437 T8g = T8e - T8f;
Chris@42 438 {
Chris@42 439 E T4E, T2E, T2H, T4I;
Chris@42 440 T4E = T2A + T2D;
Chris@42 441 T2E = T2A - T2D;
Chris@42 442 T3P = FMA(KP707106781, T2y, T2n);
Chris@42 443 T2z = FNMS(KP707106781, T2y, T2n);
Chris@42 444 T9c = FNMS(KP414213562, T92, T93);
Chris@42 445 T94 = FMA(KP414213562, T93, T92);
Chris@42 446 T2H = T2F + T2G;
Chris@42 447 T4I = T2G - T2F;
Chris@42 448 T3O = FMA(KP707106781, T2H, T2E);
Chris@42 449 T2I = FNMS(KP707106781, T2H, T2E);
Chris@42 450 T5y = FMA(KP707106781, T4I, T4H);
Chris@42 451 T4J = FNMS(KP707106781, T4I, T4H);
Chris@42 452 T8u = FMA(KP414213562, T8d, T8g);
Chris@42 453 T8h = FNMS(KP414213562, T8g, T8d);
Chris@42 454 T5z = FMA(KP707106781, T4F, T4E);
Chris@42 455 T4G = FNMS(KP707106781, T4F, T4E);
Chris@42 456 }
Chris@42 457 }
Chris@42 458 }
Chris@42 459 }
Chris@42 460 {
Chris@42 461 E T4w, T1J, T7Z, Tm, T6p, T80, T4x, T1O, T1z, Tp, T1A, T6k, T1x, T1u, Ts;
Chris@42 462 E T1B;
Chris@42 463 {
Chris@42 464 E T1K, Ti, T1L, T6n, T1I, T1F, Tl, T1M;
Chris@42 465 {
Chris@42 466 E T1G, T1H, Tg, Th, Tj, Tk;
Chris@42 467 Tg = Rp[WS(rs, 2)];
Chris@42 468 Th = Rm[WS(rs, 13)];
Chris@42 469 T1G = Ip[WS(rs, 2)];
Chris@42 470 T6Y = T6s + T6z;
Chris@42 471 T6A = T6s - T6z;
Chris@42 472 T1K = Tg - Th;
Chris@42 473 Ti = Tg + Th;
Chris@42 474 T1H = Im[WS(rs, 13)];
Chris@42 475 Tj = Rp[WS(rs, 10)];
Chris@42 476 Tk = Rm[WS(rs, 5)];
Chris@42 477 T1L = Ip[WS(rs, 10)];
Chris@42 478 T6n = T1G - T1H;
Chris@42 479 T1I = T1G + T1H;
Chris@42 480 T1F = Tj - Tk;
Chris@42 481 Tl = Tj + Tk;
Chris@42 482 T1M = Im[WS(rs, 5)];
Chris@42 483 }
Chris@42 484 {
Chris@42 485 E T1v, T1w, Tq, Tr;
Chris@42 486 {
Chris@42 487 E Tn, T1N, T6o, To;
Chris@42 488 Tn = Rm[WS(rs, 1)];
Chris@42 489 T4w = T1I - T1F;
Chris@42 490 T1J = T1F + T1I;
Chris@42 491 T7Z = Ti - Tl;
Chris@42 492 Tm = Ti + Tl;
Chris@42 493 T1N = T1L + T1M;
Chris@42 494 T6o = T1L - T1M;
Chris@42 495 To = Rp[WS(rs, 14)];
Chris@42 496 T1v = Ip[WS(rs, 14)];
Chris@42 497 T6p = T6n + T6o;
Chris@42 498 T80 = T6n - T6o;
Chris@42 499 T4x = T1K + T1N;
Chris@42 500 T1O = T1K - T1N;
Chris@42 501 T1z = Tn - To;
Chris@42 502 Tp = Tn + To;
Chris@42 503 T1w = Im[WS(rs, 1)];
Chris@42 504 }
Chris@42 505 Tq = Rp[WS(rs, 6)];
Chris@42 506 Tr = Rm[WS(rs, 9)];
Chris@42 507 T1A = Ip[WS(rs, 6)];
Chris@42 508 T6k = T1v - T1w;
Chris@42 509 T1x = T1v + T1w;
Chris@42 510 T1u = Tq - Tr;
Chris@42 511 Ts = Tq + Tr;
Chris@42 512 T1B = Im[WS(rs, 9)];
Chris@42 513 }
Chris@42 514 }
Chris@42 515 {
Chris@42 516 E T4z, T6m, T4A, T2Z, T1E, T1P, T2Y, T50, T51;
Chris@42 517 {
Chris@42 518 E T1y, T81, T8q, T1D, T7Y, T8r;
Chris@42 519 {
Chris@42 520 E T7X, Tt, T1C, T6l, T7W;
Chris@42 521 T4z = T1u + T1x;
Chris@42 522 T1y = T1u - T1x;
Chris@42 523 T7X = Tp - Ts;
Chris@42 524 Tt = Tp + Ts;
Chris@42 525 T1C = T1A + T1B;
Chris@42 526 T6l = T1A - T1B;
Chris@42 527 T81 = T7Z + T80;
Chris@42 528 T8q = T7Z - T80;
Chris@42 529 T6m = T6k + T6l;
Chris@42 530 T7W = T6k - T6l;
Chris@42 531 T4A = T1z + T1C;
Chris@42 532 T1D = T1z - T1C;
Chris@42 533 T6P = Tm - Tt;
Chris@42 534 Tu = Tm + Tt;
Chris@42 535 T7Y = T7W - T7X;
Chris@42 536 T8r = T7X + T7W;
Chris@42 537 }
Chris@42 538 T2Z = FMA(KP414213562, T1y, T1D);
Chris@42 539 T1E = FNMS(KP414213562, T1D, T1y);
Chris@42 540 T9a = T81 + T7Y;
Chris@42 541 T82 = T7Y - T81;
Chris@42 542 T8X = T8q + T8r;
Chris@42 543 T8s = T8q - T8r;
Chris@42 544 T1P = FMA(KP414213562, T1O, T1J);
Chris@42 545 T2Y = FNMS(KP414213562, T1J, T1O);
Chris@42 546 }
Chris@42 547 T4y = FNMS(KP414213562, T4x, T4w);
Chris@42 548 T50 = FMA(KP414213562, T4w, T4x);
Chris@42 549 T40 = T1P + T1E;
Chris@42 550 T1Q = T1E - T1P;
Chris@42 551 T3M = T2Y + T2Z;
Chris@42 552 T30 = T2Y - T2Z;
Chris@42 553 T51 = FMA(KP414213562, T4z, T4A);
Chris@42 554 T4B = FNMS(KP414213562, T4A, T4z);
Chris@42 555 T5w = T50 + T51;
Chris@42 556 T52 = T50 - T51;
Chris@42 557 T7u = T6p + T6m;
Chris@42 558 T6q = T6m - T6p;
Chris@42 559 }
Chris@42 560 }
Chris@42 561 }
Chris@42 562 {
Chris@42 563 E T7D, T7K, T7J, T5K, T4C, T7E, T83, T8w, T8t, T8i, T6r, T70, T6X, T6K;
Chris@42 564 {
Chris@42 565 E T8Y, T9e, T9b, T95, T8F, T8G, T8L, T8M;
Chris@42 566 {
Chris@42 567 E T7v, T7p, T7y, Tv, T10;
Chris@42 568 T7D = Tf - Tu;
Chris@42 569 Tv = Tf + Tu;
Chris@42 570 T10 = TK + TZ;
Chris@42 571 T7K = TK - TZ;
Chris@42 572 T7J = T7t - T7u;
Chris@42 573 T7v = T7t + T7u;
Chris@42 574 T5K = T4B - T4y;
Chris@42 575 T4C = T4y + T4B;
Chris@42 576 T7p = Tv - T10;
Chris@42 577 T7E = T7x - T7w;
Chris@42 578 T7y = T7w + T7x;
Chris@42 579 Rp[0] = Tv + T10;
Chris@42 580 {
Chris@42 581 E T9p, T9x, T9z, T9v;
Chris@42 582 {
Chris@42 583 E T9n, T7A, T7q, T7z, T9o, T9t, T9u;
Chris@42 584 T8Y = FNMS(KP707106781, T8X, T8W);
Chris@42 585 T9n = FMA(KP707106781, T8X, T8W);
Chris@42 586 T7A = T7s * T7p;
Chris@42 587 T7q = T7o * T7p;
Chris@42 588 Rm[0] = T7v + T7y;
Chris@42 589 T7z = T7v - T7y;
Chris@42 590 T9o = T9c + T9d;
Chris@42 591 T9e = T9c - T9d;
Chris@42 592 T9b = FNMS(KP707106781, T9a, T99);
Chris@42 593 T9t = FMA(KP707106781, T9a, T99);
Chris@42 594 T9u = T94 + T91;
Chris@42 595 T95 = T91 - T94;
Chris@42 596 Rm[WS(rs, 8)] = FMA(T7o, T7z, T7A);
Chris@42 597 Rp[WS(rs, 8)] = FNMS(T7s, T7z, T7q);
Chris@42 598 T9p = FNMS(KP923879532, T9o, T9n);
Chris@42 599 T9x = FMA(KP923879532, T9o, T9n);
Chris@42 600 T9z = FMA(KP923879532, T9u, T9t);
Chris@42 601 T9v = FNMS(KP923879532, T9u, T9t);
Chris@42 602 }
Chris@42 603 {
Chris@42 604 E T9y, T9q, T9w, T9A;
Chris@42 605 T9y = T3v * T9x;
Chris@42 606 T9q = T9m * T9p;
Chris@42 607 T9w = T9m * T9v;
Chris@42 608 T9A = T3v * T9z;
Chris@42 609 Rp[WS(rs, 1)] = FNMS(T3x, T9z, T9y);
Chris@42 610 Rp[WS(rs, 9)] = FNMS(T9s, T9v, T9q);
Chris@42 611 Rm[WS(rs, 9)] = FMA(T9s, T9p, T9w);
Chris@42 612 Rm[WS(rs, 1)] = FMA(T3x, T9x, T9A);
Chris@42 613 }
Chris@42 614 }
Chris@42 615 T83 = FMA(KP707106781, T82, T7V);
Chris@42 616 T8F = FNMS(KP707106781, T82, T7V);
Chris@42 617 T8G = T8u + T8v;
Chris@42 618 T8w = T8u - T8v;
Chris@42 619 T8t = FMA(KP707106781, T8s, T8p);
Chris@42 620 T8L = FNMS(KP707106781, T8s, T8p);
Chris@42 621 T8M = T8h + T8a;
Chris@42 622 T8i = T8a - T8h;
Chris@42 623 }
Chris@42 624 {
Chris@42 625 E T79, T7a, T7f, T7g;
Chris@42 626 T6r = T6j + T6q;
Chris@42 627 T79 = T6j - T6q;
Chris@42 628 {
Chris@42 629 E T8Q, T8H, T8T, T8N;
Chris@42 630 T8Q = FMA(KP923879532, T8G, T8F);
Chris@42 631 T8H = FNMS(KP923879532, T8G, T8F);
Chris@42 632 T8T = FMA(KP923879532, T8M, T8L);
Chris@42 633 T8N = FNMS(KP923879532, T8M, T8L);
Chris@42 634 {
Chris@42 635 E T8R, T8I, T8U, T8O;
Chris@42 636 T8R = T8P * T8Q;
Chris@42 637 T8I = T8E * T8H;
Chris@42 638 T8U = T8P * T8T;
Chris@42 639 T8O = T8E * T8N;
Chris@42 640 Rp[WS(rs, 15)] = FNMS(T8S, T8T, T8R);
Chris@42 641 Rp[WS(rs, 7)] = FNMS(T8K, T8N, T8I);
Chris@42 642 Rm[WS(rs, 15)] = FMA(T8S, T8Q, T8U);
Chris@42 643 Rm[WS(rs, 7)] = FMA(T8K, T8H, T8O);
Chris@42 644 T7a = T6Z - T6Y;
Chris@42 645 T70 = T6Y + T6Z;
Chris@42 646 }
Chris@42 647 }
Chris@42 648 T6X = T6P + T6W;
Chris@42 649 T7f = T6W - T6P;
Chris@42 650 T7g = T6A - T6J;
Chris@42 651 T6K = T6A + T6J;
Chris@42 652 {
Chris@42 653 E T7j, T7b, T7l, T7h;
Chris@42 654 T7j = FMA(KP707106781, T7a, T79);
Chris@42 655 T7b = FNMS(KP707106781, T7a, T79);
Chris@42 656 T7l = FMA(KP707106781, T7g, T7f);
Chris@42 657 T7h = FNMS(KP707106781, T7g, T7f);
Chris@42 658 {
Chris@42 659 E T7k, T7c, T7m, T7i;
Chris@42 660 T7k = T5X * T7j;
Chris@42 661 T7c = T78 * T7b;
Chris@42 662 T7m = T5X * T7l;
Chris@42 663 T7i = T78 * T7h;
Chris@42 664 Rp[WS(rs, 6)] = FNMS(T5Z, T7l, T7k);
Chris@42 665 Rp[WS(rs, 14)] = FNMS(T7e, T7h, T7c);
Chris@42 666 Rm[WS(rs, 6)] = FMA(T5Z, T7j, T7m);
Chris@42 667 Rm[WS(rs, 14)] = FMA(T7e, T7b, T7i);
Chris@42 668 }
Chris@42 669 }
Chris@42 670 {
Chris@42 671 E T9h, T96, T9j, T9f;
Chris@42 672 T9h = FMA(KP923879532, T95, T8Y);
Chris@42 673 T96 = FNMS(KP923879532, T95, T8Y);
Chris@42 674 T9j = FMA(KP923879532, T9e, T9b);
Chris@42 675 T9f = FNMS(KP923879532, T9e, T9b);
Chris@42 676 {
Chris@42 677 E T9k, T9i, T9g, T97;
Chris@42 678 T9k = T3J * T9h;
Chris@42 679 T9i = T3G * T9h;
Chris@42 680 T9g = T98 * T96;
Chris@42 681 T97 = T8V * T96;
Chris@42 682 Rm[WS(rs, 5)] = FMA(T3G, T9j, T9k);
Chris@42 683 Rp[WS(rs, 5)] = FNMS(T3J, T9j, T9i);
Chris@42 684 Rm[WS(rs, 13)] = FMA(T8V, T9f, T9g);
Chris@42 685 Rp[WS(rs, 13)] = FNMS(T98, T9f, T97);
Chris@42 686 }
Chris@42 687 }
Chris@42 688 }
Chris@42 689 }
Chris@42 690 {
Chris@42 691 E T31, T3r, T1R, T3m, T33, T32, T3s, T2K, T8z, T8j;
Chris@42 692 {
Chris@42 693 E T73, T6L, T75, T71;
Chris@42 694 T73 = FMA(KP707106781, T6K, T6r);
Chris@42 695 T6L = FNMS(KP707106781, T6K, T6r);
Chris@42 696 T75 = FMA(KP707106781, T70, T6X);
Chris@42 697 T71 = FNMS(KP707106781, T70, T6X);
Chris@42 698 {
Chris@42 699 E T76, T74, T72, T6M;
Chris@42 700 T76 = T3d * T73;
Chris@42 701 T74 = T3a * T73;
Chris@42 702 T72 = T6O * T6L;
Chris@42 703 T6M = T6i * T6L;
Chris@42 704 Rm[WS(rs, 2)] = FMA(T3a, T75, T76);
Chris@42 705 Rp[WS(rs, 2)] = FNMS(T3d, T75, T74);
Chris@42 706 Rm[WS(rs, 10)] = FMA(T6i, T71, T72);
Chris@42 707 Rp[WS(rs, 10)] = FNMS(T6O, T71, T6M);
Chris@42 708 }
Chris@42 709 }
Chris@42 710 {
Chris@42 711 E T7N, T7F, T7P, T7L;
Chris@42 712 T7N = T7D + T7E;
Chris@42 713 T7F = T7D - T7E;
Chris@42 714 T7P = T7K + T7J;
Chris@42 715 T7L = T7J - T7K;
Chris@42 716 {
Chris@42 717 E T7O, T7G, T7Q, T7M;
Chris@42 718 T7O = T4p * T7N;
Chris@42 719 T7G = T7C * T7F;
Chris@42 720 T7Q = T4p * T7P;
Chris@42 721 T7M = T7C * T7L;
Chris@42 722 Rp[WS(rs, 4)] = FNMS(T4r, T7P, T7O);
Chris@42 723 Rp[WS(rs, 12)] = FNMS(T7I, T7L, T7G);
Chris@42 724 Rm[WS(rs, 4)] = FMA(T4r, T7N, T7Q);
Chris@42 725 Rm[WS(rs, 12)] = FMA(T7I, T7F, T7M);
Chris@42 726 }
Chris@42 727 }
Chris@42 728 T31 = FMA(KP923879532, T30, T2X);
Chris@42 729 T3r = FNMS(KP923879532, T30, T2X);
Chris@42 730 T8z = FMA(KP923879532, T8i, T83);
Chris@42 731 T8j = FNMS(KP923879532, T8i, T83);
Chris@42 732 {
Chris@42 733 E T8B, T8x, T8C, T8A;
Chris@42 734 T8B = FMA(KP923879532, T8w, T8t);
Chris@42 735 T8x = FNMS(KP923879532, T8w, T8t);
Chris@42 736 T8C = T1a * T8z;
Chris@42 737 T8A = T16 * T8z;
Chris@42 738 {
Chris@42 739 E T8y, T8k, T2i, T2J;
Chris@42 740 T8y = T8m * T8j;
Chris@42 741 T8k = T7S * T8j;
Chris@42 742 Rm[WS(rs, 3)] = FMA(T16, T8B, T8C);
Chris@42 743 Rp[WS(rs, 3)] = FNMS(T1a, T8B, T8A);
Chris@42 744 Rm[WS(rs, 11)] = FMA(T7S, T8x, T8y);
Chris@42 745 Rp[WS(rs, 11)] = FNMS(T8m, T8x, T8k);
Chris@42 746 T1R = FMA(KP923879532, T1Q, T1t);
Chris@42 747 T3m = FNMS(KP923879532, T1Q, T1t);
Chris@42 748 T33 = FNMS(KP668178637, T28, T2h);
Chris@42 749 T2i = FMA(KP668178637, T2h, T28);
Chris@42 750 T2J = FNMS(KP668178637, T2I, T2z);
Chris@42 751 T32 = FMA(KP668178637, T2z, T2I);
Chris@42 752 T3s = T2J + T2i;
Chris@42 753 T2K = T2i - T2J;
Chris@42 754 }
Chris@42 755 }
Chris@42 756 {
Chris@42 757 E T5l, T53, T5f, T4D, T4K, T4R, T56, T5g;
Chris@42 758 T5l = FNMS(KP923879532, T52, T4Z);
Chris@42 759 T53 = FMA(KP923879532, T52, T4Z);
Chris@42 760 {
Chris@42 761 E T3t, T3D, T3f, T2L;
Chris@42 762 T3t = FNMS(KP831469612, T3s, T3r);
Chris@42 763 T3D = FMA(KP831469612, T3s, T3r);
Chris@42 764 T3f = FMA(KP831469612, T2K, T1R);
Chris@42 765 T2L = FNMS(KP831469612, T2K, T1R);
Chris@42 766 {
Chris@42 767 E T3n, T34, T3g, T2M;
Chris@42 768 T3n = T32 + T33;
Chris@42 769 T34 = T32 - T33;
Chris@42 770 T3g = T3e * T3f;
Chris@42 771 T2M = T1c * T2L;
Chris@42 772 {
Chris@42 773 E T3o, T3z, T3j, T35;
Chris@42 774 T3o = FNMS(KP831469612, T3n, T3m);
Chris@42 775 T3z = FMA(KP831469612, T3n, T3m);
Chris@42 776 T3j = FMA(KP831469612, T34, T31);
Chris@42 777 T35 = FNMS(KP831469612, T34, T31);
Chris@42 778 {
Chris@42 779 E T3u, T3p, T3E, T3A;
Chris@42 780 T3u = T3q * T3o;
Chris@42 781 T3p = T3l * T3o;
Chris@42 782 T3E = T3C * T3z;
Chris@42 783 T3A = T3y * T3z;
Chris@42 784 {
Chris@42 785 E T3k, T36, T54, T55;
Chris@42 786 T3k = T3e * T3j;
Chris@42 787 Ip[WS(rs, 2)] = FNMS(T3i, T3j, T3g);
Chris@42 788 T36 = T1c * T35;
Chris@42 789 Ip[WS(rs, 10)] = FNMS(T2O, T35, T2M);
Chris@42 790 Im[WS(rs, 6)] = FMA(T3l, T3t, T3u);
Chris@42 791 Ip[WS(rs, 6)] = FNMS(T3q, T3t, T3p);
Chris@42 792 Im[WS(rs, 14)] = FMA(T3y, T3D, T3E);
Chris@42 793 Ip[WS(rs, 14)] = FNMS(T3C, T3D, T3A);
Chris@42 794 Im[WS(rs, 2)] = FMA(T3i, T3f, T3k);
Chris@42 795 Im[WS(rs, 10)] = FMA(T2O, T2L, T36);
Chris@42 796 T5f = FMA(KP923879532, T4C, T4v);
Chris@42 797 T4D = FNMS(KP923879532, T4C, T4v);
Chris@42 798 T4K = FNMS(KP668178637, T4J, T4G);
Chris@42 799 T54 = FMA(KP668178637, T4G, T4J);
Chris@42 800 T55 = FMA(KP668178637, T4N, T4Q);
Chris@42 801 T4R = FNMS(KP668178637, T4Q, T4N);
Chris@42 802 T56 = T54 - T55;
Chris@42 803 T5g = T54 + T55;
Chris@42 804 }
Chris@42 805 }
Chris@42 806 }
Chris@42 807 }
Chris@42 808 }
Chris@42 809 {
Chris@42 810 E T4h, T41, T4c, T3N, T3Q, T3T, T44, T4d;
Chris@42 811 T4h = FNMS(KP923879532, T40, T3Z);
Chris@42 812 T41 = FMA(KP923879532, T40, T3Z);
Chris@42 813 {
Chris@42 814 E T57, T5b, T5h, T5p;
Chris@42 815 T57 = FNMS(KP831469612, T56, T53);
Chris@42 816 T5b = FMA(KP831469612, T56, T53);
Chris@42 817 T5h = FNMS(KP831469612, T5g, T5f);
Chris@42 818 T5p = FMA(KP831469612, T5g, T5f);
Chris@42 819 {
Chris@42 820 E T5m, T4S, T5i, T5q;
Chris@42 821 T5m = T4K - T4R;
Chris@42 822 T4S = T4K + T4R;
Chris@42 823 T5i = T5e * T5h;
Chris@42 824 T5q = T17 * T5p;
Chris@42 825 {
Chris@42 826 E T5n, T5r, T59, T4T;
Chris@42 827 T5n = FMA(KP831469612, T5m, T5l);
Chris@42 828 T5r = FNMS(KP831469612, T5m, T5l);
Chris@42 829 T59 = FMA(KP831469612, T4S, T4D);
Chris@42 830 T4T = FNMS(KP831469612, T4S, T4D);
Chris@42 831 {
Chris@42 832 E T5o, T5s, T5c, T5a;
Chris@42 833 T5o = T5e * T5n;
Chris@42 834 Ip[WS(rs, 5)] = FNMS(T5k, T5n, T5i);
Chris@42 835 T5s = T17 * T5r;
Chris@42 836 Ip[WS(rs, 13)] = FNMS(T1b, T5r, T5q);
Chris@42 837 T5c = T14 * T59;
Chris@42 838 T5a = T11 * T59;
Chris@42 839 {
Chris@42 840 E T58, T4U, T42, T43;
Chris@42 841 T58 = T4W * T4T;
Chris@42 842 T4U = T4s * T4T;
Chris@42 843 Im[WS(rs, 5)] = FMA(T5k, T5h, T5o);
Chris@42 844 Im[WS(rs, 13)] = FMA(T1b, T5p, T5s);
Chris@42 845 Im[WS(rs, 1)] = FMA(T11, T5b, T5c);
Chris@42 846 Ip[WS(rs, 1)] = FNMS(T14, T5b, T5a);
Chris@42 847 Im[WS(rs, 9)] = FMA(T4s, T57, T58);
Chris@42 848 Ip[WS(rs, 9)] = FNMS(T4W, T57, T4U);
Chris@42 849 T4c = FNMS(KP923879532, T3M, T3L);
Chris@42 850 T3N = FMA(KP923879532, T3M, T3L);
Chris@42 851 T3Q = FNMS(KP198912367, T3P, T3O);
Chris@42 852 T42 = FMA(KP198912367, T3O, T3P);
Chris@42 853 T43 = FNMS(KP198912367, T3R, T3S);
Chris@42 854 T3T = FMA(KP198912367, T3S, T3R);
Chris@42 855 T44 = T42 + T43;
Chris@42 856 T4d = T43 - T42;
Chris@42 857 }
Chris@42 858 }
Chris@42 859 }
Chris@42 860 }
Chris@42 861 }
Chris@42 862 T67 = FNMS(KP923879532, T5K, T5J);
Chris@42 863 T5L = FMA(KP923879532, T5K, T5J);
Chris@42 864 {
Chris@42 865 E T45, T49, T4e, T4l;
Chris@42 866 T45 = FNMS(KP980785280, T44, T41);
Chris@42 867 T49 = FMA(KP980785280, T44, T41);
Chris@42 868 T4e = FNMS(KP980785280, T4d, T4c);
Chris@42 869 T4l = FMA(KP980785280, T4d, T4c);
Chris@42 870 {
Chris@42 871 E T4i, T3U, T4f, T4m;
Chris@42 872 T4i = T3Q - T3T;
Chris@42 873 T3U = T3Q + T3T;
Chris@42 874 T4f = T4b * T4e;
Chris@42 875 T4m = T12 * T4l;
Chris@42 876 {
Chris@42 877 E T4j, T4n, T47, T3V;
Chris@42 878 T4j = FNMS(KP980785280, T4i, T4h);
Chris@42 879 T4n = FMA(KP980785280, T4i, T4h);
Chris@42 880 T47 = FMA(KP980785280, T3U, T3N);
Chris@42 881 T3V = FNMS(KP980785280, T3U, T3N);
Chris@42 882 {
Chris@42 883 E T4k, T4o, T4a, T48;
Chris@42 884 T4k = T4b * T4j;
Chris@42 885 Ip[WS(rs, 12)] = FNMS(T4g, T4j, T4f);
Chris@42 886 T4o = T12 * T4n;
Chris@42 887 Ip[WS(rs, 4)] = FNMS(T15, T4n, T4m);
Chris@42 888 T4a = T39 * T47;
Chris@42 889 T48 = T37 * T47;
Chris@42 890 {
Chris@42 891 E T46, T3W, T5M, T5N;
Chris@42 892 T46 = T3Y * T3V;
Chris@42 893 T3W = T3K * T3V;
Chris@42 894 Im[WS(rs, 12)] = FMA(T4g, T4e, T4k);
Chris@42 895 Im[WS(rs, 4)] = FMA(T15, T4l, T4o);
Chris@42 896 Im[0] = FMA(T37, T49, T4a);
Chris@42 897 Ip[0] = FNMS(T39, T49, T48);
Chris@42 898 Im[WS(rs, 8)] = FMA(T3K, T45, T46);
Chris@42 899 Ip[WS(rs, 8)] = FNMS(T3Y, T45, T3W);
Chris@42 900 T61 = FMA(KP923879532, T5w, T5v);
Chris@42 901 T5x = FNMS(KP923879532, T5w, T5v);
Chris@42 902 T5A = FNMS(KP198912367, T5z, T5y);
Chris@42 903 T5M = FMA(KP198912367, T5y, T5z);
Chris@42 904 T5N = FMA(KP198912367, T5B, T5C);
Chris@42 905 T5D = FNMS(KP198912367, T5C, T5B);
Chris@42 906 T5O = T5M - T5N;
Chris@42 907 T62 = T5M + T5N;
Chris@42 908 }
Chris@42 909 }
Chris@42 910 }
Chris@42 911 }
Chris@42 912 }
Chris@42 913 }
Chris@42 914 }
Chris@42 915 }
Chris@42 916 }
Chris@42 917 }
Chris@42 918 }
Chris@42 919 T5V = FMA(KP980785280, T5O, T5L);
Chris@42 920 T5P = FNMS(KP980785280, T5O, T5L);
Chris@42 921 {
Chris@42 922 E T6c, T63, T5E, T68;
Chris@42 923 T6c = FMA(KP980785280, T62, T61);
Chris@42 924 T63 = FNMS(KP980785280, T62, T61);
Chris@42 925 T5E = T5A + T5D;
Chris@42 926 T68 = T5D - T5A;
Chris@42 927 {
Chris@42 928 E T64, T6d, T6f, T69;
Chris@42 929 T64 = T60 * T63;
Chris@42 930 T6d = T6b * T6c;
Chris@42 931 T6f = FNMS(KP980785280, T68, T67);
Chris@42 932 T69 = FMA(KP980785280, T68, T67);
Chris@42 933 {
Chris@42 934 E T5F, T5S, T6a, T6g;
Chris@42 935 T5F = FMA(KP980785280, T5E, T5x);
Chris@42 936 T5S = FNMS(KP980785280, T5E, T5x);
Chris@42 937 T6a = T60 * T69;
Chris@42 938 Ip[WS(rs, 7)] = FNMS(T66, T69, T64);
Chris@42 939 T6g = T6b * T6f;
Chris@42 940 Ip[WS(rs, 15)] = FNMS(T6e, T6f, T6d);
Chris@42 941 {
Chris@42 942 E T5W, T5T, T5Q, T5G;
Chris@42 943 T5W = T5U * T5S;
Chris@42 944 T5T = T5R * T5S;
Chris@42 945 T5Q = T5I * T5F;
Chris@42 946 T5G = T5u * T5F;
Chris@42 947 Im[WS(rs, 7)] = FMA(T66, T63, T6a);
Chris@42 948 Im[WS(rs, 15)] = FMA(T6e, T6c, T6g);
Chris@42 949 Im[WS(rs, 3)] = FMA(T5R, T5V, T5W);
Chris@42 950 Ip[WS(rs, 3)] = FNMS(T5U, T5V, T5T);
Chris@42 951 Im[WS(rs, 11)] = FMA(T5u, T5P, T5Q);
Chris@42 952 Ip[WS(rs, 11)] = FNMS(T5I, T5P, T5G);
Chris@42 953 }
Chris@42 954 }
Chris@42 955 }
Chris@42 956 }
Chris@42 957 }
Chris@42 958 }
Chris@42 959 }
Chris@42 960
Chris@42 961 static const tw_instr twinstr[] = {
Chris@42 962 {TW_CEXP, 1, 1},
Chris@42 963 {TW_CEXP, 1, 3},
Chris@42 964 {TW_CEXP, 1, 9},
Chris@42 965 {TW_CEXP, 1, 27},
Chris@42 966 {TW_NEXT, 1, 0}
Chris@42 967 };
Chris@42 968
Chris@42 969 static const hc2c_desc desc = { 32, "hc2cb2_32", twinstr, &GENUS, {236, 98, 252, 0} };
Chris@42 970
Chris@42 971 void X(codelet_hc2cb2_32) (planner *p) {
Chris@42 972 X(khc2c_register) (p, hc2cb2_32, &desc, HC2C_VIA_RDFT);
Chris@42 973 }
Chris@42 974 #else /* HAVE_FMA */
Chris@42 975
Chris@42 976 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h */
Chris@42 977
Chris@42 978 /*
Chris@42 979 * This function contains 488 FP additions, 280 FP multiplications,
Chris@42 980 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
Chris@42 981 * 160 stack variables, 7 constants, and 128 memory accesses
Chris@42 982 */
Chris@42 983 #include "hc2cb.h"
Chris@42 984
Chris@42 985 static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 986 {
Chris@42 987 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 988 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 989 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 990 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 991 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 992 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 993 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 994 {
Chris@42 995 INT m;
Chris@42 996 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(128, rs)) {
Chris@42 997 E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t;
Chris@42 998 E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z;
Chris@42 999 E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F;
Chris@42 1000 E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n;
Chris@42 1001 E T4f, T4j;
Chris@42 1002 {
Chris@42 1003 E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J;
Chris@42 1004 E T3a;
Chris@42 1005 {
Chris@42 1006 E T13, T1b, T16, T1a;
Chris@42 1007 T11 = W[0];
Chris@42 1008 T14 = W[1];
Chris@42 1009 T12 = W[2];
Chris@42 1010 T15 = W[3];
Chris@42 1011 T13 = T11 * T12;
Chris@42 1012 T1b = T14 * T12;
Chris@42 1013 T16 = T14 * T15;
Chris@42 1014 T1a = T11 * T15;
Chris@42 1015 T17 = T13 + T16;
Chris@42 1016 T2z = T13 - T16;
Chris@42 1017 T2B = T1a + T1b;
Chris@42 1018 T1c = T1a - T1b;
Chris@42 1019 T18 = W[4];
Chris@42 1020 T2P = T12 * T18;
Chris@42 1021 T3q = T14 * T18;
Chris@42 1022 T2U = T15 * T18;
Chris@42 1023 T3l = T11 * T18;
Chris@42 1024 T1d = W[5];
Chris@42 1025 T2Q = T15 * T1d;
Chris@42 1026 T3p = T11 * T1d;
Chris@42 1027 T2T = T12 * T1d;
Chris@42 1028 T3m = T14 * T1d;
Chris@42 1029 T1g = W[6];
Chris@42 1030 T2D = T11 * T1g;
Chris@42 1031 T3g = T15 * T1g;
Chris@42 1032 T2K = T14 * T1g;
Chris@42 1033 T39 = T12 * T1g;
Chris@42 1034 T1k = W[7];
Chris@42 1035 T2E = T14 * T1k;
Chris@42 1036 T3f = T12 * T1k;
Chris@42 1037 T2J = T11 * T1k;
Chris@42 1038 T3a = T15 * T1k;
Chris@42 1039 }
Chris@42 1040 T2F = T2D - T2E;
Chris@42 1041 T2L = T2J + T2K;
Chris@42 1042 T3t = T39 - T3a;
Chris@42 1043 T4H = T2J - T2K;
Chris@42 1044 T3h = T3f - T3g;
Chris@42 1045 T3V = T3f + T3g;
Chris@42 1046 T3b = T39 + T3a;
Chris@42 1047 T4v = T2D + T2E;
Chris@42 1048 T4T = FMA(T18, T1g, T1d * T1k);
Chris@42 1049 T4X = FNMS(T1d, T1g, T18 * T1k);
Chris@42 1050 {
Chris@42 1051 E T6r, T6s, T6x, T6y;
Chris@42 1052 T6r = T17 * T1g;
Chris@42 1053 T6s = T1c * T1k;
Chris@42 1054 T6t = T6r - T6s;
Chris@42 1055 T71 = T6r + T6s;
Chris@42 1056 T6x = T17 * T1k;
Chris@42 1057 T6y = T1c * T1g;
Chris@42 1058 T6z = T6x + T6y;
Chris@42 1059 T75 = T6x - T6y;
Chris@42 1060 }
Chris@42 1061 {
Chris@42 1062 E T7Z, T80, T8d, T8e;
Chris@42 1063 T7Z = T2z * T1g;
Chris@42 1064 T80 = T2B * T1k;
Chris@42 1065 T81 = T7Z + T80;
Chris@42 1066 T8x = T7Z - T80;
Chris@42 1067 T8d = T2z * T1k;
Chris@42 1068 T8e = T2B * T1g;
Chris@42 1069 T8f = T8d - T8e;
Chris@42 1070 T8z = T8d + T8e;
Chris@42 1071 T2R = T2P - T2Q;
Chris@42 1072 T2V = T2T + T2U;
Chris@42 1073 T8p = FMA(T2R, T1g, T2V * T1k);
Chris@42 1074 T8t = FNMS(T2V, T1g, T2R * T1k);
Chris@42 1075 }
Chris@42 1076 T4r = T2P + T2Q;
Chris@42 1077 T4t = T2T - T2U;
Chris@42 1078 T53 = FMA(T4r, T1g, T4t * T1k);
Chris@42 1079 T69 = FNMS(T4t, T1g, T4r * T1k);
Chris@42 1080 T3n = T3l + T3m;
Chris@42 1081 T3r = T3p - T3q;
Chris@42 1082 T7P = FMA(T3n, T1g, T3r * T1k);
Chris@42 1083 T7T = FNMS(T3r, T1g, T3n * T1k);
Chris@42 1084 T4P = T3l - T3m;
Chris@42 1085 T4R = T3p + T3q;
Chris@42 1086 T6F = FMA(T4P, T1g, T4R * T1k);
Chris@42 1087 T6R = FNMS(T4R, T1g, T4P * T1k);
Chris@42 1088 {
Chris@42 1089 E T19, T1e, T1h, T1i;
Chris@42 1090 T19 = T17 * T18;
Chris@42 1091 T1e = T1c * T1d;
Chris@42 1092 T1f = T19 + T1e;
Chris@42 1093 T2X = T19 - T1e;
Chris@42 1094 T1h = T17 * T1d;
Chris@42 1095 T1i = T1c * T18;
Chris@42 1096 T1j = T1h - T1i;
Chris@42 1097 T2Y = T1h + T1i;
Chris@42 1098 }
Chris@42 1099 T1l = FMA(T1f, T1g, T1j * T1k);
Chris@42 1100 T31 = FNMS(T2Y, T1g, T2X * T1k);
Chris@42 1101 T2d = FNMS(T1j, T1g, T1f * T1k);
Chris@42 1102 T2Z = FMA(T2X, T1g, T2Y * T1k);
Chris@42 1103 {
Chris@42 1104 E T47, T48, T4a, T4b;
Chris@42 1105 T47 = T2z * T18;
Chris@42 1106 T48 = T2B * T1d;
Chris@42 1107 T49 = T47 - T48;
Chris@42 1108 T4h = T47 + T48;
Chris@42 1109 T4a = T2z * T1d;
Chris@42 1110 T4b = T2B * T18;
Chris@42 1111 T4c = T4a + T4b;
Chris@42 1112 T4i = T4a - T4b;
Chris@42 1113 }
Chris@42 1114 T4d = FMA(T49, T1g, T4c * T1k);
Chris@42 1115 T4n = FNMS(T4i, T1g, T4h * T1k);
Chris@42 1116 T4f = FNMS(T4c, T1g, T49 * T1k);
Chris@42 1117 T4j = FMA(T4h, T1g, T4i * T1k);
Chris@42 1118 }
Chris@42 1119 {
Chris@42 1120 E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w;
Chris@42 1121 E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h;
Chris@42 1122 E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M;
Chris@42 1123 E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M;
Chris@42 1124 E T65, T6N, T3K, T4A;
Chris@42 1125 {
Chris@42 1126 E T3, T54, T2h, T6b, T6, T6a, T2k, T55, Ta, T57, T2o, T58, Td, T5a, T2r;
Chris@42 1127 E T5b;
Chris@42 1128 {
Chris@42 1129 E T1, T2, T2f, T2g;
Chris@42 1130 T1 = Rp[0];
Chris@42 1131 T2 = Rm[WS(rs, 15)];
Chris@42 1132 T3 = T1 + T2;
Chris@42 1133 T54 = T1 - T2;
Chris@42 1134 T2f = Ip[0];
Chris@42 1135 T2g = Im[WS(rs, 15)];
Chris@42 1136 T2h = T2f - T2g;
Chris@42 1137 T6b = T2f + T2g;
Chris@42 1138 }
Chris@42 1139 {
Chris@42 1140 E T4, T5, T2i, T2j;
Chris@42 1141 T4 = Rp[WS(rs, 8)];
Chris@42 1142 T5 = Rm[WS(rs, 7)];
Chris@42 1143 T6 = T4 + T5;
Chris@42 1144 T6a = T4 - T5;
Chris@42 1145 T2i = Ip[WS(rs, 8)];
Chris@42 1146 T2j = Im[WS(rs, 7)];
Chris@42 1147 T2k = T2i - T2j;
Chris@42 1148 T55 = T2i + T2j;
Chris@42 1149 }
Chris@42 1150 {
Chris@42 1151 E T8, T9, T2m, T2n;
Chris@42 1152 T8 = Rp[WS(rs, 4)];
Chris@42 1153 T9 = Rm[WS(rs, 11)];
Chris@42 1154 Ta = T8 + T9;
Chris@42 1155 T57 = T8 - T9;
Chris@42 1156 T2m = Ip[WS(rs, 4)];
Chris@42 1157 T2n = Im[WS(rs, 11)];
Chris@42 1158 T2o = T2m - T2n;
Chris@42 1159 T58 = T2m + T2n;
Chris@42 1160 }
Chris@42 1161 {
Chris@42 1162 E Tb, Tc, T2p, T2q;
Chris@42 1163 Tb = Rm[WS(rs, 3)];
Chris@42 1164 Tc = Rp[WS(rs, 12)];
Chris@42 1165 Td = Tb + Tc;
Chris@42 1166 T5a = Tb - Tc;
Chris@42 1167 T2p = Ip[WS(rs, 12)];
Chris@42 1168 T2q = Im[WS(rs, 3)];
Chris@42 1169 T2r = T2p - T2q;
Chris@42 1170 T5b = T2p + T2q;
Chris@42 1171 }
Chris@42 1172 {
Chris@42 1173 E T7, Te, T2l, T2s;
Chris@42 1174 T56 = T54 - T55;
Chris@42 1175 T7b = T54 + T55;
Chris@42 1176 T7C = T6b - T6a;
Chris@42 1177 T6c = T6a + T6b;
Chris@42 1178 T7 = T3 + T6;
Chris@42 1179 Te = Ta + Td;
Chris@42 1180 Tf = T7 + Te;
Chris@42 1181 T1m = T7 - Te;
Chris@42 1182 {
Chris@42 1183 E T6d, T6e, T3W, T3X;
Chris@42 1184 T6d = T57 + T58;
Chris@42 1185 T6e = T5a + T5b;
Chris@42 1186 T6f = KP707106781 * (T6d - T6e);
Chris@42 1187 T7c = KP707106781 * (T6d + T6e);
Chris@42 1188 T3W = T2h - T2k;
Chris@42 1189 T3X = Ta - Td;
Chris@42 1190 T3Y = T3W - T3X;
Chris@42 1191 T4I = T3X + T3W;
Chris@42 1192 }
Chris@42 1193 T2l = T2h + T2k;
Chris@42 1194 T2s = T2o + T2r;
Chris@42 1195 T2t = T2l - T2s;
Chris@42 1196 T32 = T2l + T2s;
Chris@42 1197 {
Chris@42 1198 E T59, T5c, T3u, T3v;
Chris@42 1199 T59 = T57 - T58;
Chris@42 1200 T5c = T5a - T5b;
Chris@42 1201 T5d = KP707106781 * (T59 + T5c);
Chris@42 1202 T7D = KP707106781 * (T59 - T5c);
Chris@42 1203 T3u = T3 - T6;
Chris@42 1204 T3v = T2r - T2o;
Chris@42 1205 T3w = T3u - T3v;
Chris@42 1206 T4w = T3u + T3v;
Chris@42 1207 }
Chris@42 1208 }
Chris@42 1209 }
Chris@42 1210 {
Chris@42 1211 E Ti, T5p, T1w, T5n, Tl, T5m, T1z, T5q, Tp, T5i, T1p, T5g, Ts, T5f, T1s;
Chris@42 1212 E T5j;
Chris@42 1213 {
Chris@42 1214 E Tg, Th, T1u, T1v;
Chris@42 1215 Tg = Rp[WS(rs, 2)];
Chris@42 1216 Th = Rm[WS(rs, 13)];
Chris@42 1217 Ti = Tg + Th;
Chris@42 1218 T5p = Tg - Th;
Chris@42 1219 T1u = Ip[WS(rs, 2)];
Chris@42 1220 T1v = Im[WS(rs, 13)];
Chris@42 1221 T1w = T1u - T1v;
Chris@42 1222 T5n = T1u + T1v;
Chris@42 1223 }
Chris@42 1224 {
Chris@42 1225 E Tj, Tk, T1x, T1y;
Chris@42 1226 Tj = Rp[WS(rs, 10)];
Chris@42 1227 Tk = Rm[WS(rs, 5)];
Chris@42 1228 Tl = Tj + Tk;
Chris@42 1229 T5m = Tj - Tk;
Chris@42 1230 T1x = Ip[WS(rs, 10)];
Chris@42 1231 T1y = Im[WS(rs, 5)];
Chris@42 1232 T1z = T1x - T1y;
Chris@42 1233 T5q = T1x + T1y;
Chris@42 1234 }
Chris@42 1235 {
Chris@42 1236 E Tn, To, T1n, T1o;
Chris@42 1237 Tn = Rm[WS(rs, 1)];
Chris@42 1238 To = Rp[WS(rs, 14)];
Chris@42 1239 Tp = Tn + To;
Chris@42 1240 T5i = Tn - To;
Chris@42 1241 T1n = Ip[WS(rs, 14)];
Chris@42 1242 T1o = Im[WS(rs, 1)];
Chris@42 1243 T1p = T1n - T1o;
Chris@42 1244 T5g = T1n + T1o;
Chris@42 1245 }
Chris@42 1246 {
Chris@42 1247 E Tq, Tr, T1q, T1r;
Chris@42 1248 Tq = Rp[WS(rs, 6)];
Chris@42 1249 Tr = Rm[WS(rs, 9)];
Chris@42 1250 Ts = Tq + Tr;
Chris@42 1251 T5f = Tq - Tr;
Chris@42 1252 T1q = Ip[WS(rs, 6)];
Chris@42 1253 T1r = Im[WS(rs, 9)];
Chris@42 1254 T1s = T1q - T1r;
Chris@42 1255 T5j = T1q + T1r;
Chris@42 1256 }
Chris@42 1257 {
Chris@42 1258 E Tm, Tt, T7e, T7f;
Chris@42 1259 Tm = Ti + Tl;
Chris@42 1260 Tt = Tp + Ts;
Chris@42 1261 Tu = Tm + Tt;
Chris@42 1262 T2e = Tm - Tt;
Chris@42 1263 T7e = T5p + T5q;
Chris@42 1264 T7f = T5n - T5m;
Chris@42 1265 T7g = FNMS(KP923879532, T7f, KP382683432 * T7e);
Chris@42 1266 T7F = FMA(KP382683432, T7f, KP923879532 * T7e);
Chris@42 1267 }
Chris@42 1268 {
Chris@42 1269 E T7h, T7i, T1t, T1A;
Chris@42 1270 T7h = T5i + T5j;
Chris@42 1271 T7i = T5f + T5g;
Chris@42 1272 T7j = FNMS(KP923879532, T7i, KP382683432 * T7h);
Chris@42 1273 T7G = FMA(KP382683432, T7i, KP923879532 * T7h);
Chris@42 1274 T1t = T1p + T1s;
Chris@42 1275 T1A = T1w + T1z;
Chris@42 1276 T1B = T1t - T1A;
Chris@42 1277 T33 = T1A + T1t;
Chris@42 1278 }
Chris@42 1279 {
Chris@42 1280 E T3x, T3y, T5h, T5k;
Chris@42 1281 T3x = T1p - T1s;
Chris@42 1282 T3y = Tp - Ts;
Chris@42 1283 T3z = T3x - T3y;
Chris@42 1284 T40 = T3y + T3x;
Chris@42 1285 T5h = T5f - T5g;
Chris@42 1286 T5k = T5i - T5j;
Chris@42 1287 T5l = FNMS(KP382683432, T5k, KP923879532 * T5h);
Chris@42 1288 T6i = FMA(KP382683432, T5h, KP923879532 * T5k);
Chris@42 1289 }
Chris@42 1290 {
Chris@42 1291 E T5o, T5r, T3A, T3B;
Chris@42 1292 T5o = T5m + T5n;
Chris@42 1293 T5r = T5p - T5q;
Chris@42 1294 T5s = FMA(KP923879532, T5o, KP382683432 * T5r);
Chris@42 1295 T6h = FNMS(KP382683432, T5o, KP923879532 * T5r);
Chris@42 1296 T3A = Ti - Tl;
Chris@42 1297 T3B = T1w - T1z;
Chris@42 1298 T3C = T3A + T3B;
Chris@42 1299 T3Z = T3A - T3B;
Chris@42 1300 }
Chris@42 1301 }
Chris@42 1302 {
Chris@42 1303 E Ty, T5v, T1G, T5H, TB, T5G, T1J, T5w, TI, T5K, T1Q, T5D, TF, T5J, T1N;
Chris@42 1304 E T5A;
Chris@42 1305 {
Chris@42 1306 E Tw, Tx, T1H, T1I;
Chris@42 1307 Tw = Rp[WS(rs, 1)];
Chris@42 1308 Tx = Rm[WS(rs, 14)];
Chris@42 1309 Ty = Tw + Tx;
Chris@42 1310 T5v = Tw - Tx;
Chris@42 1311 {
Chris@42 1312 E T1E, T1F, Tz, TA;
Chris@42 1313 T1E = Ip[WS(rs, 1)];
Chris@42 1314 T1F = Im[WS(rs, 14)];
Chris@42 1315 T1G = T1E - T1F;
Chris@42 1316 T5H = T1E + T1F;
Chris@42 1317 Tz = Rp[WS(rs, 9)];
Chris@42 1318 TA = Rm[WS(rs, 6)];
Chris@42 1319 TB = Tz + TA;
Chris@42 1320 T5G = Tz - TA;
Chris@42 1321 }
Chris@42 1322 T1H = Ip[WS(rs, 9)];
Chris@42 1323 T1I = Im[WS(rs, 6)];
Chris@42 1324 T1J = T1H - T1I;
Chris@42 1325 T5w = T1H + T1I;
Chris@42 1326 {
Chris@42 1327 E TG, TH, T5B, T1O, T1P, T5C;
Chris@42 1328 TG = Rm[WS(rs, 2)];
Chris@42 1329 TH = Rp[WS(rs, 13)];
Chris@42 1330 T5B = TG - TH;
Chris@42 1331 T1O = Ip[WS(rs, 13)];
Chris@42 1332 T1P = Im[WS(rs, 2)];
Chris@42 1333 T5C = T1O + T1P;
Chris@42 1334 TI = TG + TH;
Chris@42 1335 T5K = T5B + T5C;
Chris@42 1336 T1Q = T1O - T1P;
Chris@42 1337 T5D = T5B - T5C;
Chris@42 1338 }
Chris@42 1339 {
Chris@42 1340 E TD, TE, T5y, T1L, T1M, T5z;
Chris@42 1341 TD = Rp[WS(rs, 5)];
Chris@42 1342 TE = Rm[WS(rs, 10)];
Chris@42 1343 T5y = TD - TE;
Chris@42 1344 T1L = Ip[WS(rs, 5)];
Chris@42 1345 T1M = Im[WS(rs, 10)];
Chris@42 1346 T5z = T1L + T1M;
Chris@42 1347 TF = TD + TE;
Chris@42 1348 T5J = T5y + T5z;
Chris@42 1349 T1N = T1L - T1M;
Chris@42 1350 T5A = T5y - T5z;
Chris@42 1351 }
Chris@42 1352 }
Chris@42 1353 {
Chris@42 1354 E TC, TJ, T7t, T7u;
Chris@42 1355 TC = Ty + TB;
Chris@42 1356 TJ = TF + TI;
Chris@42 1357 TK = TC + TJ;
Chris@42 1358 T1D = TC - TJ;
Chris@42 1359 T7t = T5H - T5G;
Chris@42 1360 T7u = KP707106781 * (T5A - T5D);
Chris@42 1361 T7v = T7t + T7u;
Chris@42 1362 T86 = T7t - T7u;
Chris@42 1363 }
Chris@42 1364 {
Chris@42 1365 E T7w, T7x, T1K, T1R;
Chris@42 1366 T7w = T5v + T5w;
Chris@42 1367 T7x = KP707106781 * (T5J + T5K);
Chris@42 1368 T7y = T7w - T7x;
Chris@42 1369 T85 = T7w + T7x;
Chris@42 1370 T1K = T1G + T1J;
Chris@42 1371 T1R = T1N + T1Q;
Chris@42 1372 T1S = T1K - T1R;
Chris@42 1373 T35 = T1K + T1R;
Chris@42 1374 }
Chris@42 1375 {
Chris@42 1376 E T3M, T3N, T5x, T5E;
Chris@42 1377 T3M = T1G - T1J;
Chris@42 1378 T3N = TF - TI;
Chris@42 1379 T3O = T3M - T3N;
Chris@42 1380 T4C = T3N + T3M;
Chris@42 1381 T5x = T5v - T5w;
Chris@42 1382 T5E = KP707106781 * (T5A + T5D);
Chris@42 1383 T5F = T5x - T5E;
Chris@42 1384 T6J = T5x + T5E;
Chris@42 1385 }
Chris@42 1386 {
Chris@42 1387 E T5I, T5L, T3P, T3Q;
Chris@42 1388 T5I = T5G + T5H;
Chris@42 1389 T5L = KP707106781 * (T5J - T5K);
Chris@42 1390 T5M = T5I - T5L;
Chris@42 1391 T6K = T5I + T5L;
Chris@42 1392 T3P = Ty - TB;
Chris@42 1393 T3Q = T1Q - T1N;
Chris@42 1394 T3R = T3P - T3Q;
Chris@42 1395 T4D = T3P + T3Q;
Chris@42 1396 }
Chris@42 1397 }
Chris@42 1398 {
Chris@42 1399 E TN, T5O, T1X, T60, TQ, T5Z, T20, T5P, TX, T63, T27, T5W, TU, T62, T24;
Chris@42 1400 E T5T;
Chris@42 1401 {
Chris@42 1402 E TL, TM, T1Y, T1Z;
Chris@42 1403 TL = Rm[0];
Chris@42 1404 TM = Rp[WS(rs, 15)];
Chris@42 1405 TN = TL + TM;
Chris@42 1406 T5O = TL - TM;
Chris@42 1407 {
Chris@42 1408 E T1V, T1W, TO, TP;
Chris@42 1409 T1V = Ip[WS(rs, 15)];
Chris@42 1410 T1W = Im[0];
Chris@42 1411 T1X = T1V - T1W;
Chris@42 1412 T60 = T1V + T1W;
Chris@42 1413 TO = Rp[WS(rs, 7)];
Chris@42 1414 TP = Rm[WS(rs, 8)];
Chris@42 1415 TQ = TO + TP;
Chris@42 1416 T5Z = TO - TP;
Chris@42 1417 }
Chris@42 1418 T1Y = Ip[WS(rs, 7)];
Chris@42 1419 T1Z = Im[WS(rs, 8)];
Chris@42 1420 T20 = T1Y - T1Z;
Chris@42 1421 T5P = T1Y + T1Z;
Chris@42 1422 {
Chris@42 1423 E TV, TW, T5U, T25, T26, T5V;
Chris@42 1424 TV = Rm[WS(rs, 4)];
Chris@42 1425 TW = Rp[WS(rs, 11)];
Chris@42 1426 T5U = TV - TW;
Chris@42 1427 T25 = Ip[WS(rs, 11)];
Chris@42 1428 T26 = Im[WS(rs, 4)];
Chris@42 1429 T5V = T25 + T26;
Chris@42 1430 TX = TV + TW;
Chris@42 1431 T63 = T5U + T5V;
Chris@42 1432 T27 = T25 - T26;
Chris@42 1433 T5W = T5U - T5V;
Chris@42 1434 }
Chris@42 1435 {
Chris@42 1436 E TS, TT, T5R, T22, T23, T5S;
Chris@42 1437 TS = Rp[WS(rs, 3)];
Chris@42 1438 TT = Rm[WS(rs, 12)];
Chris@42 1439 T5R = TS - TT;
Chris@42 1440 T22 = Ip[WS(rs, 3)];
Chris@42 1441 T23 = Im[WS(rs, 12)];
Chris@42 1442 T5S = T22 + T23;
Chris@42 1443 TU = TS + TT;
Chris@42 1444 T62 = T5R + T5S;
Chris@42 1445 T24 = T22 - T23;
Chris@42 1446 T5T = T5R - T5S;
Chris@42 1447 }
Chris@42 1448 }
Chris@42 1449 {
Chris@42 1450 E TR, TY, T7m, T7n;
Chris@42 1451 TR = TN + TQ;
Chris@42 1452 TY = TU + TX;
Chris@42 1453 TZ = TR + TY;
Chris@42 1454 T1U = TR - TY;
Chris@42 1455 T7m = KP707106781 * (T5T - T5W);
Chris@42 1456 T7n = T5Z + T60;
Chris@42 1457 T7o = T7m - T7n;
Chris@42 1458 T89 = T7n + T7m;
Chris@42 1459 }
Chris@42 1460 {
Chris@42 1461 E T7p, T7q, T21, T28;
Chris@42 1462 T7p = T5O + T5P;
Chris@42 1463 T7q = KP707106781 * (T62 + T63);
Chris@42 1464 T7r = T7p - T7q;
Chris@42 1465 T88 = T7p + T7q;
Chris@42 1466 T21 = T1X + T20;
Chris@42 1467 T28 = T24 + T27;
Chris@42 1468 T29 = T21 - T28;
Chris@42 1469 T36 = T21 + T28;
Chris@42 1470 }
Chris@42 1471 {
Chris@42 1472 E T3F, T3G, T5Q, T5X;
Chris@42 1473 T3F = T1X - T20;
Chris@42 1474 T3G = TU - TX;
Chris@42 1475 T3H = T3F - T3G;
Chris@42 1476 T4z = T3G + T3F;
Chris@42 1477 T5Q = T5O - T5P;
Chris@42 1478 T5X = KP707106781 * (T5T + T5W);
Chris@42 1479 T5Y = T5Q - T5X;
Chris@42 1480 T6M = T5Q + T5X;
Chris@42 1481 }
Chris@42 1482 {
Chris@42 1483 E T61, T64, T3I, T3J;
Chris@42 1484 T61 = T5Z - T60;
Chris@42 1485 T64 = KP707106781 * (T62 - T63);
Chris@42 1486 T65 = T61 - T64;
Chris@42 1487 T6N = T61 + T64;
Chris@42 1488 T3I = TN - TQ;
Chris@42 1489 T3J = T27 - T24;
Chris@42 1490 T3K = T3I - T3J;
Chris@42 1491 T4A = T3I + T3J;
Chris@42 1492 }
Chris@42 1493 }
Chris@42 1494 {
Chris@42 1495 E Tv, T10, T30, T34, T37, T38;
Chris@42 1496 Tv = Tf + Tu;
Chris@42 1497 T10 = TK + TZ;
Chris@42 1498 T30 = Tv - T10;
Chris@42 1499 T34 = T32 + T33;
Chris@42 1500 T37 = T35 + T36;
Chris@42 1501 T38 = T34 - T37;
Chris@42 1502 Rp[0] = Tv + T10;
Chris@42 1503 Rm[0] = T34 + T37;
Chris@42 1504 Rp[WS(rs, 8)] = FNMS(T31, T38, T2Z * T30);
Chris@42 1505 Rm[WS(rs, 8)] = FMA(T31, T30, T2Z * T38);
Chris@42 1506 }
Chris@42 1507 {
Chris@42 1508 E T3e, T3o, T3k, T3s;
Chris@42 1509 {
Chris@42 1510 E T3c, T3d, T3i, T3j;
Chris@42 1511 T3c = Tf - Tu;
Chris@42 1512 T3d = T36 - T35;
Chris@42 1513 T3e = T3c - T3d;
Chris@42 1514 T3o = T3c + T3d;
Chris@42 1515 T3i = T32 - T33;
Chris@42 1516 T3j = TK - TZ;
Chris@42 1517 T3k = T3i - T3j;
Chris@42 1518 T3s = T3j + T3i;
Chris@42 1519 }
Chris@42 1520 Rp[WS(rs, 12)] = FNMS(T3h, T3k, T3b * T3e);
Chris@42 1521 Rm[WS(rs, 12)] = FMA(T3b, T3k, T3h * T3e);
Chris@42 1522 Rp[WS(rs, 4)] = FNMS(T3r, T3s, T3n * T3o);
Chris@42 1523 Rm[WS(rs, 4)] = FMA(T3n, T3s, T3r * T3o);
Chris@42 1524 }
Chris@42 1525 {
Chris@42 1526 E T1C, T2u, T2M, T2G, T2x, T2H, T2b, T2N;
Chris@42 1527 T1C = T1m + T1B;
Chris@42 1528 T2u = T2e + T2t;
Chris@42 1529 T2M = T2t - T2e;
Chris@42 1530 T2G = T1m - T1B;
Chris@42 1531 {
Chris@42 1532 E T2v, T2w, T1T, T2a;
Chris@42 1533 T2v = T1D + T1S;
Chris@42 1534 T2w = T29 - T1U;
Chris@42 1535 T2x = KP707106781 * (T2v + T2w);
Chris@42 1536 T2H = KP707106781 * (T2w - T2v);
Chris@42 1537 T1T = T1D - T1S;
Chris@42 1538 T2a = T1U + T29;
Chris@42 1539 T2b = KP707106781 * (T1T + T2a);
Chris@42 1540 T2N = KP707106781 * (T1T - T2a);
Chris@42 1541 }
Chris@42 1542 {
Chris@42 1543 E T2c, T2y, T2S, T2W;
Chris@42 1544 T2c = T1C - T2b;
Chris@42 1545 T2y = T2u - T2x;
Chris@42 1546 Rp[WS(rs, 10)] = FNMS(T2d, T2y, T1l * T2c);
Chris@42 1547 Rm[WS(rs, 10)] = FMA(T2d, T2c, T1l * T2y);
Chris@42 1548 T2S = T2G + T2H;
Chris@42 1549 T2W = T2M + T2N;
Chris@42 1550 Rp[WS(rs, 6)] = FNMS(T2V, T2W, T2R * T2S);
Chris@42 1551 Rm[WS(rs, 6)] = FMA(T2R, T2W, T2V * T2S);
Chris@42 1552 }
Chris@42 1553 {
Chris@42 1554 E T2A, T2C, T2I, T2O;
Chris@42 1555 T2A = T1C + T2b;
Chris@42 1556 T2C = T2u + T2x;
Chris@42 1557 Rp[WS(rs, 2)] = FNMS(T2B, T2C, T2z * T2A);
Chris@42 1558 Rm[WS(rs, 2)] = FMA(T2B, T2A, T2z * T2C);
Chris@42 1559 T2I = T2G - T2H;
Chris@42 1560 T2O = T2M - T2N;
Chris@42 1561 Rp[WS(rs, 14)] = FNMS(T2L, T2O, T2F * T2I);
Chris@42 1562 Rm[WS(rs, 14)] = FMA(T2F, T2O, T2L * T2I);
Chris@42 1563 }
Chris@42 1564 }
Chris@42 1565 {
Chris@42 1566 E T4y, T4U, T4K, T4Y, T4F, T4Z, T4N, T4V, T4x, T4J;
Chris@42 1567 T4x = KP707106781 * (T3Z + T40);
Chris@42 1568 T4y = T4w - T4x;
Chris@42 1569 T4U = T4w + T4x;
Chris@42 1570 T4J = KP707106781 * (T3C + T3z);
Chris@42 1571 T4K = T4I - T4J;
Chris@42 1572 T4Y = T4I + T4J;
Chris@42 1573 {
Chris@42 1574 E T4B, T4E, T4L, T4M;
Chris@42 1575 T4B = FNMS(KP382683432, T4A, KP923879532 * T4z);
Chris@42 1576 T4E = FMA(KP923879532, T4C, KP382683432 * T4D);
Chris@42 1577 T4F = T4B - T4E;
Chris@42 1578 T4Z = T4E + T4B;
Chris@42 1579 T4L = FNMS(KP382683432, T4C, KP923879532 * T4D);
Chris@42 1580 T4M = FMA(KP382683432, T4z, KP923879532 * T4A);
Chris@42 1581 T4N = T4L - T4M;
Chris@42 1582 T4V = T4L + T4M;
Chris@42 1583 }
Chris@42 1584 {
Chris@42 1585 E T4G, T4O, T51, T52;
Chris@42 1586 T4G = T4y - T4F;
Chris@42 1587 T4O = T4K - T4N;
Chris@42 1588 Rp[WS(rs, 13)] = FNMS(T4H, T4O, T4v * T4G);
Chris@42 1589 Rm[WS(rs, 13)] = FMA(T4H, T4G, T4v * T4O);
Chris@42 1590 T51 = T4U + T4V;
Chris@42 1591 T52 = T4Y + T4Z;
Chris@42 1592 Rp[WS(rs, 1)] = FNMS(T1c, T52, T17 * T51);
Chris@42 1593 Rm[WS(rs, 1)] = FMA(T17, T52, T1c * T51);
Chris@42 1594 }
Chris@42 1595 {
Chris@42 1596 E T4Q, T4S, T4W, T50;
Chris@42 1597 T4Q = T4y + T4F;
Chris@42 1598 T4S = T4K + T4N;
Chris@42 1599 Rp[WS(rs, 5)] = FNMS(T4R, T4S, T4P * T4Q);
Chris@42 1600 Rm[WS(rs, 5)] = FMA(T4R, T4Q, T4P * T4S);
Chris@42 1601 T4W = T4U - T4V;
Chris@42 1602 T50 = T4Y - T4Z;
Chris@42 1603 Rp[WS(rs, 9)] = FNMS(T4X, T50, T4T * T4W);
Chris@42 1604 Rm[WS(rs, 9)] = FMA(T4T, T50, T4X * T4W);
Chris@42 1605 }
Chris@42 1606 }
Chris@42 1607 {
Chris@42 1608 E T3E, T4k, T42, T4o, T3T, T4p, T45, T4l, T3D, T41;
Chris@42 1609 T3D = KP707106781 * (T3z - T3C);
Chris@42 1610 T3E = T3w - T3D;
Chris@42 1611 T4k = T3w + T3D;
Chris@42 1612 T41 = KP707106781 * (T3Z - T40);
Chris@42 1613 T42 = T3Y - T41;
Chris@42 1614 T4o = T3Y + T41;
Chris@42 1615 {
Chris@42 1616 E T3L, T3S, T43, T44;
Chris@42 1617 T3L = FNMS(KP923879532, T3K, KP382683432 * T3H);
Chris@42 1618 T3S = FMA(KP382683432, T3O, KP923879532 * T3R);
Chris@42 1619 T3T = T3L - T3S;
Chris@42 1620 T4p = T3S + T3L;
Chris@42 1621 T43 = FNMS(KP923879532, T3O, KP382683432 * T3R);
Chris@42 1622 T44 = FMA(KP923879532, T3H, KP382683432 * T3K);
Chris@42 1623 T45 = T43 - T44;
Chris@42 1624 T4l = T43 + T44;
Chris@42 1625 }
Chris@42 1626 {
Chris@42 1627 E T3U, T46, T4s, T4u;
Chris@42 1628 T3U = T3E - T3T;
Chris@42 1629 T46 = T42 - T45;
Chris@42 1630 Rp[WS(rs, 15)] = FNMS(T3V, T46, T3t * T3U);
Chris@42 1631 Rm[WS(rs, 15)] = FMA(T3V, T3U, T3t * T46);
Chris@42 1632 T4s = T4k + T4l;
Chris@42 1633 T4u = T4o + T4p;
Chris@42 1634 Rp[WS(rs, 3)] = FNMS(T4t, T4u, T4r * T4s);
Chris@42 1635 Rm[WS(rs, 3)] = FMA(T4r, T4u, T4t * T4s);
Chris@42 1636 }
Chris@42 1637 {
Chris@42 1638 E T4e, T4g, T4m, T4q;
Chris@42 1639 T4e = T3E + T3T;
Chris@42 1640 T4g = T42 + T45;
Chris@42 1641 Rp[WS(rs, 7)] = FNMS(T4f, T4g, T4d * T4e);
Chris@42 1642 Rm[WS(rs, 7)] = FMA(T4f, T4e, T4d * T4g);
Chris@42 1643 T4m = T4k - T4l;
Chris@42 1644 T4q = T4o - T4p;
Chris@42 1645 Rp[WS(rs, 11)] = FNMS(T4n, T4q, T4j * T4m);
Chris@42 1646 Rm[WS(rs, 11)] = FMA(T4j, T4q, T4n * T4m);
Chris@42 1647 }
Chris@42 1648 }
Chris@42 1649 {
Chris@42 1650 E T6I, T72, T6X, T73, T6P, T77, T6U, T76;
Chris@42 1651 {
Chris@42 1652 E T6G, T6H, T6V, T6W;
Chris@42 1653 T6G = T56 + T5d;
Chris@42 1654 T6H = T6h + T6i;
Chris@42 1655 T6I = T6G + T6H;
Chris@42 1656 T72 = T6G - T6H;
Chris@42 1657 T6V = FMA(KP195090322, T6J, KP980785280 * T6K);
Chris@42 1658 T6W = FNMS(KP195090322, T6M, KP980785280 * T6N);
Chris@42 1659 T6X = T6V + T6W;
Chris@42 1660 T73 = T6W - T6V;
Chris@42 1661 }
Chris@42 1662 {
Chris@42 1663 E T6L, T6O, T6S, T6T;
Chris@42 1664 T6L = FNMS(KP195090322, T6K, KP980785280 * T6J);
Chris@42 1665 T6O = FMA(KP980785280, T6M, KP195090322 * T6N);
Chris@42 1666 T6P = T6L + T6O;
Chris@42 1667 T77 = T6L - T6O;
Chris@42 1668 T6S = T6c + T6f;
Chris@42 1669 T6T = T5s + T5l;
Chris@42 1670 T6U = T6S + T6T;
Chris@42 1671 T76 = T6S - T6T;
Chris@42 1672 }
Chris@42 1673 {
Chris@42 1674 E T6Q, T6Y, T79, T7a;
Chris@42 1675 T6Q = T6I - T6P;
Chris@42 1676 T6Y = T6U - T6X;
Chris@42 1677 Ip[WS(rs, 8)] = FNMS(T6R, T6Y, T6F * T6Q);
Chris@42 1678 Im[WS(rs, 8)] = FMA(T6R, T6Q, T6F * T6Y);
Chris@42 1679 T79 = T72 + T73;
Chris@42 1680 T7a = T76 + T77;
Chris@42 1681 Ip[WS(rs, 4)] = FNMS(T1d, T7a, T18 * T79);
Chris@42 1682 Im[WS(rs, 4)] = FMA(T18, T7a, T1d * T79);
Chris@42 1683 }
Chris@42 1684 {
Chris@42 1685 E T6Z, T70, T74, T78;
Chris@42 1686 T6Z = T6I + T6P;
Chris@42 1687 T70 = T6U + T6X;
Chris@42 1688 Ip[0] = FNMS(T14, T70, T11 * T6Z);
Chris@42 1689 Im[0] = FMA(T14, T6Z, T11 * T70);
Chris@42 1690 T74 = T72 - T73;
Chris@42 1691 T78 = T76 - T77;
Chris@42 1692 Ip[WS(rs, 12)] = FNMS(T75, T78, T71 * T74);
Chris@42 1693 Im[WS(rs, 12)] = FMA(T71, T78, T75 * T74);
Chris@42 1694 }
Chris@42 1695 }
Chris@42 1696 {
Chris@42 1697 E T84, T8q, T8l, T8r, T8b, T8v, T8i, T8u;
Chris@42 1698 {
Chris@42 1699 E T82, T83, T8j, T8k;
Chris@42 1700 T82 = T7b + T7c;
Chris@42 1701 T83 = T7F + T7G;
Chris@42 1702 T84 = T82 - T83;
Chris@42 1703 T8q = T82 + T83;
Chris@42 1704 T8j = FMA(KP195090322, T86, KP980785280 * T85);
Chris@42 1705 T8k = FMA(KP195090322, T89, KP980785280 * T88);
Chris@42 1706 T8l = T8j - T8k;
Chris@42 1707 T8r = T8j + T8k;
Chris@42 1708 }
Chris@42 1709 {
Chris@42 1710 E T87, T8a, T8g, T8h;
Chris@42 1711 T87 = FNMS(KP980785280, T86, KP195090322 * T85);
Chris@42 1712 T8a = FNMS(KP980785280, T89, KP195090322 * T88);
Chris@42 1713 T8b = T87 + T8a;
Chris@42 1714 T8v = T87 - T8a;
Chris@42 1715 T8g = T7C - T7D;
Chris@42 1716 T8h = T7g - T7j;
Chris@42 1717 T8i = T8g + T8h;
Chris@42 1718 T8u = T8g - T8h;
Chris@42 1719 }
Chris@42 1720 {
Chris@42 1721 E T8c, T8m, T8y, T8A;
Chris@42 1722 T8c = T84 - T8b;
Chris@42 1723 T8m = T8i - T8l;
Chris@42 1724 Ip[WS(rs, 11)] = FNMS(T8f, T8m, T81 * T8c);
Chris@42 1725 Im[WS(rs, 11)] = FMA(T8f, T8c, T81 * T8m);
Chris@42 1726 T8y = T8q + T8r;
Chris@42 1727 T8A = T8u - T8v;
Chris@42 1728 Ip[WS(rs, 15)] = FNMS(T8z, T8A, T8x * T8y);
Chris@42 1729 Im[WS(rs, 15)] = FMA(T8x, T8A, T8z * T8y);
Chris@42 1730 }
Chris@42 1731 {
Chris@42 1732 E T8n, T8o, T8s, T8w;
Chris@42 1733 T8n = T84 + T8b;
Chris@42 1734 T8o = T8i + T8l;
Chris@42 1735 Ip[WS(rs, 3)] = FNMS(T1j, T8o, T1f * T8n);
Chris@42 1736 Im[WS(rs, 3)] = FMA(T1j, T8n, T1f * T8o);
Chris@42 1737 T8s = T8q - T8r;
Chris@42 1738 T8w = T8u + T8v;
Chris@42 1739 Ip[WS(rs, 7)] = FNMS(T8t, T8w, T8p * T8s);
Chris@42 1740 Im[WS(rs, 7)] = FMA(T8p, T8w, T8t * T8s);
Chris@42 1741 }
Chris@42 1742 }
Chris@42 1743 {
Chris@42 1744 E T5u, T6u, T6n, T6v, T67, T6B, T6k, T6A;
Chris@42 1745 {
Chris@42 1746 E T5e, T5t, T6l, T6m;
Chris@42 1747 T5e = T56 - T5d;
Chris@42 1748 T5t = T5l - T5s;
Chris@42 1749 T5u = T5e + T5t;
Chris@42 1750 T6u = T5e - T5t;
Chris@42 1751 T6l = FMA(KP831469612, T5F, KP555570233 * T5M);
Chris@42 1752 T6m = FNMS(KP831469612, T5Y, KP555570233 * T65);
Chris@42 1753 T6n = T6l + T6m;
Chris@42 1754 T6v = T6m - T6l;
Chris@42 1755 }
Chris@42 1756 {
Chris@42 1757 E T5N, T66, T6g, T6j;
Chris@42 1758 T5N = FNMS(KP831469612, T5M, KP555570233 * T5F);
Chris@42 1759 T66 = FMA(KP555570233, T5Y, KP831469612 * T65);
Chris@42 1760 T67 = T5N + T66;
Chris@42 1761 T6B = T5N - T66;
Chris@42 1762 T6g = T6c - T6f;
Chris@42 1763 T6j = T6h - T6i;
Chris@42 1764 T6k = T6g + T6j;
Chris@42 1765 T6A = T6g - T6j;
Chris@42 1766 }
Chris@42 1767 {
Chris@42 1768 E T68, T6o, T6D, T6E;
Chris@42 1769 T68 = T5u - T67;
Chris@42 1770 T6o = T6k - T6n;
Chris@42 1771 Ip[WS(rs, 10)] = FNMS(T69, T6o, T53 * T68);
Chris@42 1772 Im[WS(rs, 10)] = FMA(T69, T68, T53 * T6o);
Chris@42 1773 T6D = T6u + T6v;
Chris@42 1774 T6E = T6A + T6B;
Chris@42 1775 Ip[WS(rs, 6)] = FNMS(T4c, T6E, T49 * T6D);
Chris@42 1776 Im[WS(rs, 6)] = FMA(T49, T6E, T4c * T6D);
Chris@42 1777 }
Chris@42 1778 {
Chris@42 1779 E T6p, T6q, T6w, T6C;
Chris@42 1780 T6p = T5u + T67;
Chris@42 1781 T6q = T6k + T6n;
Chris@42 1782 Ip[WS(rs, 2)] = FNMS(T4i, T6q, T4h * T6p);
Chris@42 1783 Im[WS(rs, 2)] = FMA(T4i, T6p, T4h * T6q);
Chris@42 1784 T6w = T6u - T6v;
Chris@42 1785 T6C = T6A - T6B;
Chris@42 1786 Ip[WS(rs, 14)] = FNMS(T6z, T6C, T6t * T6w);
Chris@42 1787 Im[WS(rs, 14)] = FMA(T6t, T6C, T6z * T6w);
Chris@42 1788 }
Chris@42 1789 }
Chris@42 1790 {
Chris@42 1791 E T7l, T7Q, T7L, T7R, T7A, T7V, T7I, T7U;
Chris@42 1792 {
Chris@42 1793 E T7d, T7k, T7J, T7K;
Chris@42 1794 T7d = T7b - T7c;
Chris@42 1795 T7k = T7g + T7j;
Chris@42 1796 T7l = T7d - T7k;
Chris@42 1797 T7Q = T7d + T7k;
Chris@42 1798 T7J = FNMS(KP555570233, T7v, KP831469612 * T7y);
Chris@42 1799 T7K = FMA(KP555570233, T7o, KP831469612 * T7r);
Chris@42 1800 T7L = T7J - T7K;
Chris@42 1801 T7R = T7J + T7K;
Chris@42 1802 }
Chris@42 1803 {
Chris@42 1804 E T7s, T7z, T7E, T7H;
Chris@42 1805 T7s = FNMS(KP555570233, T7r, KP831469612 * T7o);
Chris@42 1806 T7z = FMA(KP831469612, T7v, KP555570233 * T7y);
Chris@42 1807 T7A = T7s - T7z;
Chris@42 1808 T7V = T7z + T7s;
Chris@42 1809 T7E = T7C + T7D;
Chris@42 1810 T7H = T7F - T7G;
Chris@42 1811 T7I = T7E - T7H;
Chris@42 1812 T7U = T7E + T7H;
Chris@42 1813 }
Chris@42 1814 {
Chris@42 1815 E T7B, T7M, T7X, T7Y;
Chris@42 1816 T7B = T7l - T7A;
Chris@42 1817 T7M = T7I - T7L;
Chris@42 1818 Ip[WS(rs, 13)] = FNMS(T1k, T7M, T1g * T7B);
Chris@42 1819 Im[WS(rs, 13)] = FMA(T1k, T7B, T1g * T7M);
Chris@42 1820 T7X = T7Q + T7R;
Chris@42 1821 T7Y = T7U + T7V;
Chris@42 1822 Ip[WS(rs, 1)] = FNMS(T15, T7Y, T12 * T7X);
Chris@42 1823 Im[WS(rs, 1)] = FMA(T12, T7Y, T15 * T7X);
Chris@42 1824 }
Chris@42 1825 {
Chris@42 1826 E T7N, T7O, T7S, T7W;
Chris@42 1827 T7N = T7l + T7A;
Chris@42 1828 T7O = T7I + T7L;
Chris@42 1829 Ip[WS(rs, 5)] = FNMS(T2Y, T7O, T2X * T7N);
Chris@42 1830 Im[WS(rs, 5)] = FMA(T2Y, T7N, T2X * T7O);
Chris@42 1831 T7S = T7Q - T7R;
Chris@42 1832 T7W = T7U - T7V;
Chris@42 1833 Ip[WS(rs, 9)] = FNMS(T7T, T7W, T7P * T7S);
Chris@42 1834 Im[WS(rs, 9)] = FMA(T7P, T7W, T7T * T7S);
Chris@42 1835 }
Chris@42 1836 }
Chris@42 1837 }
Chris@42 1838 }
Chris@42 1839 }
Chris@42 1840 }
Chris@42 1841
Chris@42 1842 static const tw_instr twinstr[] = {
Chris@42 1843 {TW_CEXP, 1, 1},
Chris@42 1844 {TW_CEXP, 1, 3},
Chris@42 1845 {TW_CEXP, 1, 9},
Chris@42 1846 {TW_CEXP, 1, 27},
Chris@42 1847 {TW_NEXT, 1, 0}
Chris@42 1848 };
Chris@42 1849
Chris@42 1850 static const hc2c_desc desc = { 32, "hc2cb2_32", twinstr, &GENUS, {376, 168, 112, 0} };
Chris@42 1851
Chris@42 1852 void X(codelet_hc2cb2_32) (planner *p) {
Chris@42 1853 X(khc2c_register) (p, hc2cb2_32, &desc, HC2C_VIA_RDFT);
Chris@42 1854 }
Chris@42 1855 #endif /* HAVE_FMA */