annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb2_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:38 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 488 FP additions, 350 FP multiplications,
Chris@82 32 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
Chris@82 33 * 164 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 45 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 46 {
Chris@82 47 INT m;
Chris@82 48 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 49 E T11, T14, T12, T37, T17, T1b, T39, T3a, T3v, T3d, T3x, T15, T16, T5X, T4p;
Chris@82 50 E T3G, T78, T7e, T8S, T9s, T8P, T8V, T98, T9m, T7I, T7C, T3y, T4b, T3C, T4g;
Chris@82 51 E T5u, T6b, T5I, T6e, T1a, T1c, T2O, T4r, T4s, T4W, T3J, T3K, T3Y, T5Z, T60;
Chris@82 52 E T66, T3i, T3q, T3l, T3e, T7S, T8K, T8m, T8E, T5k, T5U, T5R, T5e, T6i, T7s;
Chris@82 53 E T6O, T7o;
Chris@82 54 {
Chris@82 55 E T77, T9l, T7B, T7d, T9r, T7H, T3b, T5d, T19, T3I;
Chris@82 56 {
Chris@82 57 E T13, T3F, T38, T3c;
Chris@82 58 T11 = W[2];
Chris@82 59 T14 = W[3];
Chris@82 60 T12 = W[4];
Chris@82 61 T37 = W[0];
Chris@82 62 T13 = T11 * T12;
Chris@82 63 T3F = T37 * T12;
Chris@82 64 T38 = T37 * T11;
Chris@82 65 T3c = T37 * T14;
Chris@82 66 T17 = W[6];
Chris@82 67 T77 = T37 * T17;
Chris@82 68 T9l = T12 * T17;
Chris@82 69 T7B = T11 * T17;
Chris@82 70 T1b = W[7];
Chris@82 71 T7d = T37 * T1b;
Chris@82 72 T9r = T12 * T1b;
Chris@82 73 T7H = T11 * T1b;
Chris@82 74 T39 = W[1];
Chris@82 75 T3a = FNMS(T39, T14, T38);
Chris@82 76 T3v = FMA(T39, T14, T38);
Chris@82 77 T3b = T3a * T12;
Chris@82 78 T5d = T3v * T12;
Chris@82 79 T3d = FMA(T39, T11, T3c);
Chris@82 80 T3x = FNMS(T39, T11, T3c);
Chris@82 81 T15 = W[5];
Chris@82 82 T19 = T11 * T15;
Chris@82 83 T3I = T37 * T15;
Chris@82 84 T16 = FMA(T14, T15, T13);
Chris@82 85 T5X = FNMS(T14, T15, T13);
Chris@82 86 T4p = FMA(T39, T15, T3F);
Chris@82 87 T3G = FNMS(T39, T15, T3F);
Chris@82 88 }
Chris@82 89 T78 = FNMS(T39, T1b, T77);
Chris@82 90 T7e = FMA(T39, T17, T7d);
Chris@82 91 T8S = FMA(T14, T17, T7H);
Chris@82 92 T9s = FNMS(T15, T17, T9r);
Chris@82 93 T8P = FNMS(T14, T1b, T7B);
Chris@82 94 T8V = FMA(T39, T1b, T77);
Chris@82 95 T98 = FNMS(T39, T17, T7d);
Chris@82 96 T9m = FMA(T15, T1b, T9l);
Chris@82 97 T7I = FNMS(T14, T17, T7H);
Chris@82 98 T7C = FMA(T14, T1b, T7B);
Chris@82 99 {
Chris@82 100 E T3w, T3B, T5Y, T65;
Chris@82 101 T3w = T3v * T17;
Chris@82 102 T3y = FNMS(T3x, T1b, T3w);
Chris@82 103 T4b = FMA(T3x, T1b, T3w);
Chris@82 104 T3B = T3v * T1b;
Chris@82 105 T3C = FMA(T3x, T17, T3B);
Chris@82 106 T4g = FNMS(T3x, T17, T3B);
Chris@82 107 {
Chris@82 108 E T5t, T5H, T18, T2N;
Chris@82 109 T5t = T3a * T17;
Chris@82 110 T5u = FMA(T3d, T1b, T5t);
Chris@82 111 T6b = FNMS(T3d, T1b, T5t);
Chris@82 112 T5H = T3a * T1b;
Chris@82 113 T5I = FNMS(T3d, T17, T5H);
Chris@82 114 T6e = FMA(T3d, T17, T5H);
Chris@82 115 T18 = T16 * T17;
Chris@82 116 T2N = T16 * T1b;
Chris@82 117 T1a = FNMS(T14, T12, T19);
Chris@82 118 T1c = FMA(T1a, T1b, T18);
Chris@82 119 T2O = FNMS(T1a, T17, T2N);
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E T4q, T4V, T3H, T3X;
Chris@82 123 T4q = T4p * T17;
Chris@82 124 T4V = T4p * T1b;
Chris@82 125 T4r = FNMS(T39, T12, T3I);
Chris@82 126 T4s = FMA(T4r, T1b, T4q);
Chris@82 127 T4W = FNMS(T4r, T17, T4V);
Chris@82 128 T3H = T3G * T17;
Chris@82 129 T3X = T3G * T1b;
Chris@82 130 T3J = FMA(T39, T12, T3I);
Chris@82 131 T3K = FMA(T3J, T1b, T3H);
Chris@82 132 T3Y = FNMS(T3J, T17, T3X);
Chris@82 133 }
Chris@82 134 T5Y = T5X * T17;
Chris@82 135 T65 = T5X * T1b;
Chris@82 136 T5Z = FMA(T14, T12, T19);
Chris@82 137 T60 = FMA(T5Z, T1b, T5Y);
Chris@82 138 T66 = FNMS(T5Z, T17, T65);
Chris@82 139 {
Chris@82 140 E T8D, T8J, T7R, T8l, T3h;
Chris@82 141 T3h = T3a * T15;
Chris@82 142 T3i = FNMS(T3d, T12, T3h);
Chris@82 143 T3q = FMA(T3d, T12, T3h);
Chris@82 144 T3l = FNMS(T3d, T15, T3b);
Chris@82 145 T8D = T3l * T17;
Chris@82 146 T8J = T3l * T1b;
Chris@82 147 T3e = FMA(T3d, T15, T3b);
Chris@82 148 T7R = T3e * T17;
Chris@82 149 T8l = T3e * T1b;
Chris@82 150 T7S = FMA(T3i, T1b, T7R);
Chris@82 151 T8K = FNMS(T3q, T17, T8J);
Chris@82 152 T8m = FNMS(T3i, T17, T8l);
Chris@82 153 T8E = FMA(T3q, T1b, T8D);
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E T6h, T6N, T7n, T7r, T5j;
Chris@82 157 T5j = T3v * T15;
Chris@82 158 T5k = FMA(T3x, T12, T5j);
Chris@82 159 T5U = FNMS(T3x, T12, T5j);
Chris@82 160 T5R = FMA(T3x, T15, T5d);
Chris@82 161 T6h = T5R * T17;
Chris@82 162 T6N = T5R * T1b;
Chris@82 163 T5e = FNMS(T3x, T15, T5d);
Chris@82 164 T7n = T5e * T17;
Chris@82 165 T7r = T5e * T1b;
Chris@82 166 T6i = FMA(T5U, T1b, T6h);
Chris@82 167 T7s = FNMS(T5k, T17, T7r);
Chris@82 168 T6O = FNMS(T5U, T17, T6N);
Chris@82 169 T7o = FMA(T5k, T1b, T7n);
Chris@82 170 }
Chris@82 171 }
Chris@82 172 }
Chris@82 173 {
Chris@82 174 E Tf, T6j, T7V, T8W, T8p, T99, T1t, T3L, T2X, T3Z, T4Z, T5J, T6W, T7t, T4v;
Chris@82 175 E T5v, TZ, T7x, T28, T3S, T91, T9d, T2h, T3R, T4Q, T5B, T8a, T8v, T4N, T5C;
Chris@82 176 E T6J, T6Z, TK, T7w, T2z, T3P, T94, T9c, T2I, T3O, T4J, T5y, T8h, T8u, T4G;
Chris@82 177 E T5z, T6A, T6Y, Tu, T6P, T82, T9a, T8s, T8X, T1Q, T40, T30, T3M, T52, T5w;
Chris@82 178 E T6q, T7u, T4C, T5K;
Chris@82 179 {
Chris@82 180 E T3, T1d, T6, T2P, T2S, T6Q, T1g, T6R, Td, T6U, T1r, T2V, Ta, T6T, T1m;
Chris@82 181 E T2U;
Chris@82 182 {
Chris@82 183 E T1, T2, T1e, T1f;
Chris@82 184 T1 = cr[0];
Chris@82 185 T2 = ci[WS(rs, 15)];
Chris@82 186 T3 = T1 + T2;
Chris@82 187 T1d = T1 - T2;
Chris@82 188 {
Chris@82 189 E T4, T5, T2Q, T2R;
Chris@82 190 T4 = cr[WS(rs, 8)];
Chris@82 191 T5 = ci[WS(rs, 7)];
Chris@82 192 T6 = T4 + T5;
Chris@82 193 T2P = T4 - T5;
Chris@82 194 T2Q = ci[WS(rs, 31)];
Chris@82 195 T2R = cr[WS(rs, 16)];
Chris@82 196 T2S = T2Q + T2R;
Chris@82 197 T6Q = T2Q - T2R;
Chris@82 198 }
Chris@82 199 T1e = ci[WS(rs, 23)];
Chris@82 200 T1f = cr[WS(rs, 24)];
Chris@82 201 T1g = T1e + T1f;
Chris@82 202 T6R = T1e - T1f;
Chris@82 203 {
Chris@82 204 E Tb, Tc, T1n, T1o, T1p, T1q;
Chris@82 205 Tb = ci[WS(rs, 3)];
Chris@82 206 Tc = cr[WS(rs, 12)];
Chris@82 207 T1n = Tb - Tc;
Chris@82 208 T1o = ci[WS(rs, 19)];
Chris@82 209 T1p = cr[WS(rs, 28)];
Chris@82 210 T1q = T1o + T1p;
Chris@82 211 Td = Tb + Tc;
Chris@82 212 T6U = T1o - T1p;
Chris@82 213 T1r = T1n - T1q;
Chris@82 214 T2V = T1n + T1q;
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E T8, T9, T1i, T1j, T1k, T1l;
Chris@82 218 T8 = cr[WS(rs, 4)];
Chris@82 219 T9 = ci[WS(rs, 11)];
Chris@82 220 T1i = T8 - T9;
Chris@82 221 T1j = ci[WS(rs, 27)];
Chris@82 222 T1k = cr[WS(rs, 20)];
Chris@82 223 T1l = T1j + T1k;
Chris@82 224 Ta = T8 + T9;
Chris@82 225 T6T = T1j - T1k;
Chris@82 226 T1m = T1i - T1l;
Chris@82 227 T2U = T1i + T1l;
Chris@82 228 }
Chris@82 229 }
Chris@82 230 {
Chris@82 231 E T7, Te, T7T, T7U;
Chris@82 232 T7 = T3 + T6;
Chris@82 233 Te = Ta + Td;
Chris@82 234 Tf = T7 + Te;
Chris@82 235 T6j = T7 - Te;
Chris@82 236 T7T = T3 - T6;
Chris@82 237 T7U = T6U - T6T;
Chris@82 238 T7V = T7T - T7U;
Chris@82 239 T8W = T7T + T7U;
Chris@82 240 }
Chris@82 241 {
Chris@82 242 E T8n, T8o, T1h, T1s;
Chris@82 243 T8n = T6Q - T6R;
Chris@82 244 T8o = Ta - Td;
Chris@82 245 T8p = T8n - T8o;
Chris@82 246 T99 = T8o + T8n;
Chris@82 247 T1h = T1d - T1g;
Chris@82 248 T1s = T1m + T1r;
Chris@82 249 T1t = FNMS(KP707106781, T1s, T1h);
Chris@82 250 T3L = FMA(KP707106781, T1s, T1h);
Chris@82 251 }
Chris@82 252 {
Chris@82 253 E T2T, T2W, T4X, T4Y;
Chris@82 254 T2T = T2P + T2S;
Chris@82 255 T2W = T2U - T2V;
Chris@82 256 T2X = FNMS(KP707106781, T2W, T2T);
Chris@82 257 T3Z = FMA(KP707106781, T2W, T2T);
Chris@82 258 T4X = T2S - T2P;
Chris@82 259 T4Y = T1m - T1r;
Chris@82 260 T4Z = FMA(KP707106781, T4Y, T4X);
Chris@82 261 T5J = FNMS(KP707106781, T4Y, T4X);
Chris@82 262 }
Chris@82 263 {
Chris@82 264 E T6S, T6V, T4t, T4u;
Chris@82 265 T6S = T6Q + T6R;
Chris@82 266 T6V = T6T + T6U;
Chris@82 267 T6W = T6S - T6V;
Chris@82 268 T7t = T6S + T6V;
Chris@82 269 T4t = T1d + T1g;
Chris@82 270 T4u = T2U + T2V;
Chris@82 271 T4v = FNMS(KP707106781, T4u, T4t);
Chris@82 272 T5v = FMA(KP707106781, T4u, T4t);
Chris@82 273 }
Chris@82 274 }
Chris@82 275 {
Chris@82 276 E TR, T87, T1S, T29, T1V, T84, T2c, T6E, TY, T85, T88, T21, T26, T2f, T6H;
Chris@82 277 E T2e, T86, T89;
Chris@82 278 {
Chris@82 279 E TL, TM, TN, TO, TP, TQ;
Chris@82 280 TL = ci[0];
Chris@82 281 TM = cr[WS(rs, 15)];
Chris@82 282 TN = TL + TM;
Chris@82 283 TO = cr[WS(rs, 7)];
Chris@82 284 TP = ci[WS(rs, 8)];
Chris@82 285 TQ = TO + TP;
Chris@82 286 TR = TN + TQ;
Chris@82 287 T87 = TN - TQ;
Chris@82 288 T1S = TO - TP;
Chris@82 289 T29 = TL - TM;
Chris@82 290 }
Chris@82 291 {
Chris@82 292 E T1T, T1U, T6C, T2a, T2b, T6D;
Chris@82 293 T1T = ci[WS(rs, 16)];
Chris@82 294 T1U = cr[WS(rs, 31)];
Chris@82 295 T6C = T1T - T1U;
Chris@82 296 T2a = ci[WS(rs, 24)];
Chris@82 297 T2b = cr[WS(rs, 23)];
Chris@82 298 T6D = T2a - T2b;
Chris@82 299 T1V = T1T + T1U;
Chris@82 300 T84 = T6C - T6D;
Chris@82 301 T2c = T2a + T2b;
Chris@82 302 T6E = T6C + T6D;
Chris@82 303 }
Chris@82 304 {
Chris@82 305 E TU, T1X, T25, T6G, TX, T22, T20, T6F;
Chris@82 306 {
Chris@82 307 E TS, TT, T23, T24;
Chris@82 308 TS = cr[WS(rs, 3)];
Chris@82 309 TT = ci[WS(rs, 12)];
Chris@82 310 TU = TS + TT;
Chris@82 311 T1X = TS - TT;
Chris@82 312 T23 = ci[WS(rs, 20)];
Chris@82 313 T24 = cr[WS(rs, 27)];
Chris@82 314 T25 = T23 + T24;
Chris@82 315 T6G = T23 - T24;
Chris@82 316 }
Chris@82 317 {
Chris@82 318 E TV, TW, T1Y, T1Z;
Chris@82 319 TV = ci[WS(rs, 4)];
Chris@82 320 TW = cr[WS(rs, 11)];
Chris@82 321 TX = TV + TW;
Chris@82 322 T22 = TV - TW;
Chris@82 323 T1Y = ci[WS(rs, 28)];
Chris@82 324 T1Z = cr[WS(rs, 19)];
Chris@82 325 T20 = T1Y + T1Z;
Chris@82 326 T6F = T1Y - T1Z;
Chris@82 327 }
Chris@82 328 TY = TU + TX;
Chris@82 329 T85 = TU - TX;
Chris@82 330 T88 = T6G - T6F;
Chris@82 331 T21 = T1X + T20;
Chris@82 332 T26 = T22 + T25;
Chris@82 333 T2f = T22 - T25;
Chris@82 334 T6H = T6F + T6G;
Chris@82 335 T2e = T1X - T20;
Chris@82 336 }
Chris@82 337 TZ = TR + TY;
Chris@82 338 T7x = T6E + T6H;
Chris@82 339 {
Chris@82 340 E T1W, T27, T8Z, T90;
Chris@82 341 T1W = T1S - T1V;
Chris@82 342 T27 = T21 - T26;
Chris@82 343 T28 = FNMS(KP707106781, T27, T1W);
Chris@82 344 T3S = FMA(KP707106781, T27, T1W);
Chris@82 345 T8Z = T85 + T84;
Chris@82 346 T90 = T87 + T88;
Chris@82 347 T91 = FNMS(KP414213562, T90, T8Z);
Chris@82 348 T9d = FMA(KP414213562, T8Z, T90);
Chris@82 349 }
Chris@82 350 {
Chris@82 351 E T2d, T2g, T4O, T4P;
Chris@82 352 T2d = T29 - T2c;
Chris@82 353 T2g = T2e + T2f;
Chris@82 354 T2h = FNMS(KP707106781, T2g, T2d);
Chris@82 355 T3R = FMA(KP707106781, T2g, T2d);
Chris@82 356 T4O = T1S + T1V;
Chris@82 357 T4P = T2e - T2f;
Chris@82 358 T4Q = FNMS(KP707106781, T4P, T4O);
Chris@82 359 T5B = FMA(KP707106781, T4P, T4O);
Chris@82 360 }
Chris@82 361 T86 = T84 - T85;
Chris@82 362 T89 = T87 - T88;
Chris@82 363 T8a = FMA(KP414213562, T89, T86);
Chris@82 364 T8v = FNMS(KP414213562, T86, T89);
Chris@82 365 {
Chris@82 366 E T4L, T4M, T6B, T6I;
Chris@82 367 T4L = T29 + T2c;
Chris@82 368 T4M = T21 + T26;
Chris@82 369 T4N = FNMS(KP707106781, T4M, T4L);
Chris@82 370 T5C = FMA(KP707106781, T4M, T4L);
Chris@82 371 T6B = TR - TY;
Chris@82 372 T6I = T6E - T6H;
Chris@82 373 T6J = T6B + T6I;
Chris@82 374 T6Z = T6I - T6B;
Chris@82 375 }
Chris@82 376 }
Chris@82 377 {
Chris@82 378 E TC, T8e, T2j, T2A, T2m, T8b, T2D, T6v, TJ, T8c, T8f, T2s, T2x, T2G, T6y;
Chris@82 379 E T2F, T8d, T8g;
Chris@82 380 {
Chris@82 381 E Tw, Tx, Ty, Tz, TA, TB;
Chris@82 382 Tw = cr[WS(rs, 1)];
Chris@82 383 Tx = ci[WS(rs, 14)];
Chris@82 384 Ty = Tw + Tx;
Chris@82 385 Tz = cr[WS(rs, 9)];
Chris@82 386 TA = ci[WS(rs, 6)];
Chris@82 387 TB = Tz + TA;
Chris@82 388 TC = Ty + TB;
Chris@82 389 T8e = Ty - TB;
Chris@82 390 T2j = Tz - TA;
Chris@82 391 T2A = Tw - Tx;
Chris@82 392 }
Chris@82 393 {
Chris@82 394 E T2k, T2l, T6t, T2B, T2C, T6u;
Chris@82 395 T2k = ci[WS(rs, 30)];
Chris@82 396 T2l = cr[WS(rs, 17)];
Chris@82 397 T6t = T2k - T2l;
Chris@82 398 T2B = ci[WS(rs, 22)];
Chris@82 399 T2C = cr[WS(rs, 25)];
Chris@82 400 T6u = T2B - T2C;
Chris@82 401 T2m = T2k + T2l;
Chris@82 402 T8b = T6t - T6u;
Chris@82 403 T2D = T2B + T2C;
Chris@82 404 T6v = T6t + T6u;
Chris@82 405 }
Chris@82 406 {
Chris@82 407 E TF, T2o, T2w, T6x, TI, T2t, T2r, T6w;
Chris@82 408 {
Chris@82 409 E TD, TE, T2u, T2v;
Chris@82 410 TD = cr[WS(rs, 5)];
Chris@82 411 TE = ci[WS(rs, 10)];
Chris@82 412 TF = TD + TE;
Chris@82 413 T2o = TD - TE;
Chris@82 414 T2u = ci[WS(rs, 18)];
Chris@82 415 T2v = cr[WS(rs, 29)];
Chris@82 416 T2w = T2u + T2v;
Chris@82 417 T6x = T2u - T2v;
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E TG, TH, T2p, T2q;
Chris@82 421 TG = ci[WS(rs, 2)];
Chris@82 422 TH = cr[WS(rs, 13)];
Chris@82 423 TI = TG + TH;
Chris@82 424 T2t = TG - TH;
Chris@82 425 T2p = ci[WS(rs, 26)];
Chris@82 426 T2q = cr[WS(rs, 21)];
Chris@82 427 T2r = T2p + T2q;
Chris@82 428 T6w = T2p - T2q;
Chris@82 429 }
Chris@82 430 TJ = TF + TI;
Chris@82 431 T8c = TF - TI;
Chris@82 432 T8f = T6x - T6w;
Chris@82 433 T2s = T2o + T2r;
Chris@82 434 T2x = T2t + T2w;
Chris@82 435 T2G = T2t - T2w;
Chris@82 436 T6y = T6w + T6x;
Chris@82 437 T2F = T2o - T2r;
Chris@82 438 }
Chris@82 439 TK = TC + TJ;
Chris@82 440 T7w = T6v + T6y;
Chris@82 441 {
Chris@82 442 E T2n, T2y, T92, T93;
Chris@82 443 T2n = T2j + T2m;
Chris@82 444 T2y = T2s - T2x;
Chris@82 445 T2z = FNMS(KP707106781, T2y, T2n);
Chris@82 446 T3P = FMA(KP707106781, T2y, T2n);
Chris@82 447 T92 = T8c + T8b;
Chris@82 448 T93 = T8e + T8f;
Chris@82 449 T94 = FMA(KP414213562, T93, T92);
Chris@82 450 T9c = FNMS(KP414213562, T92, T93);
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E T2E, T2H, T4H, T4I;
Chris@82 454 T2E = T2A - T2D;
Chris@82 455 T2H = T2F + T2G;
Chris@82 456 T2I = FNMS(KP707106781, T2H, T2E);
Chris@82 457 T3O = FMA(KP707106781, T2H, T2E);
Chris@82 458 T4H = T2m - T2j;
Chris@82 459 T4I = T2G - T2F;
Chris@82 460 T4J = FNMS(KP707106781, T4I, T4H);
Chris@82 461 T5y = FMA(KP707106781, T4I, T4H);
Chris@82 462 }
Chris@82 463 T8d = T8b - T8c;
Chris@82 464 T8g = T8e - T8f;
Chris@82 465 T8h = FNMS(KP414213562, T8g, T8d);
Chris@82 466 T8u = FMA(KP414213562, T8d, T8g);
Chris@82 467 {
Chris@82 468 E T4E, T4F, T6s, T6z;
Chris@82 469 T4E = T2A + T2D;
Chris@82 470 T4F = T2s + T2x;
Chris@82 471 T4G = FNMS(KP707106781, T4F, T4E);
Chris@82 472 T5z = FMA(KP707106781, T4F, T4E);
Chris@82 473 T6s = TC - TJ;
Chris@82 474 T6z = T6v - T6y;
Chris@82 475 T6A = T6s - T6z;
Chris@82 476 T6Y = T6s + T6z;
Chris@82 477 }
Chris@82 478 }
Chris@82 479 {
Chris@82 480 E Ti, T6o, Tl, T6n, T1J, T1O, T80, T7Z, T4x, T4w, Tp, T6l, Ts, T6k, T1y;
Chris@82 481 E T1D, T7X, T7W, T4A, T4z;
Chris@82 482 {
Chris@82 483 E T1K, T1N, T1F, T1I;
Chris@82 484 {
Chris@82 485 E Tg, Th, T1L, T1M;
Chris@82 486 Tg = cr[WS(rs, 2)];
Chris@82 487 Th = ci[WS(rs, 13)];
Chris@82 488 Ti = Tg + Th;
Chris@82 489 T1K = Tg - Th;
Chris@82 490 T1L = ci[WS(rs, 21)];
Chris@82 491 T1M = cr[WS(rs, 26)];
Chris@82 492 T1N = T1L + T1M;
Chris@82 493 T6o = T1L - T1M;
Chris@82 494 }
Chris@82 495 {
Chris@82 496 E Tj, Tk, T1G, T1H;
Chris@82 497 Tj = cr[WS(rs, 10)];
Chris@82 498 Tk = ci[WS(rs, 5)];
Chris@82 499 Tl = Tj + Tk;
Chris@82 500 T1F = Tj - Tk;
Chris@82 501 T1G = ci[WS(rs, 29)];
Chris@82 502 T1H = cr[WS(rs, 18)];
Chris@82 503 T1I = T1G + T1H;
Chris@82 504 T6n = T1G - T1H;
Chris@82 505 }
Chris@82 506 T1J = T1F + T1I;
Chris@82 507 T1O = T1K - T1N;
Chris@82 508 T80 = T6n - T6o;
Chris@82 509 T7Z = Ti - Tl;
Chris@82 510 T4x = T1K + T1N;
Chris@82 511 T4w = T1I - T1F;
Chris@82 512 }
Chris@82 513 {
Chris@82 514 E T1z, T1C, T1u, T1x;
Chris@82 515 {
Chris@82 516 E Tn, To, T1A, T1B;
Chris@82 517 Tn = ci[WS(rs, 1)];
Chris@82 518 To = cr[WS(rs, 14)];
Chris@82 519 Tp = Tn + To;
Chris@82 520 T1z = Tn - To;
Chris@82 521 T1A = ci[WS(rs, 25)];
Chris@82 522 T1B = cr[WS(rs, 22)];
Chris@82 523 T1C = T1A + T1B;
Chris@82 524 T6l = T1A - T1B;
Chris@82 525 }
Chris@82 526 {
Chris@82 527 E Tq, Tr, T1v, T1w;
Chris@82 528 Tq = cr[WS(rs, 6)];
Chris@82 529 Tr = ci[WS(rs, 9)];
Chris@82 530 Ts = Tq + Tr;
Chris@82 531 T1u = Tq - Tr;
Chris@82 532 T1v = ci[WS(rs, 17)];
Chris@82 533 T1w = cr[WS(rs, 30)];
Chris@82 534 T1x = T1v + T1w;
Chris@82 535 T6k = T1v - T1w;
Chris@82 536 }
Chris@82 537 T1y = T1u - T1x;
Chris@82 538 T1D = T1z - T1C;
Chris@82 539 T7X = Tp - Ts;
Chris@82 540 T7W = T6k - T6l;
Chris@82 541 T4A = T1z + T1C;
Chris@82 542 T4z = T1u + T1x;
Chris@82 543 }
Chris@82 544 {
Chris@82 545 E Tm, Tt, T7Y, T81;
Chris@82 546 Tm = Ti + Tl;
Chris@82 547 Tt = Tp + Ts;
Chris@82 548 Tu = Tm + Tt;
Chris@82 549 T6P = Tm - Tt;
Chris@82 550 T7Y = T7W - T7X;
Chris@82 551 T81 = T7Z + T80;
Chris@82 552 T82 = T7Y - T81;
Chris@82 553 T9a = T81 + T7Y;
Chris@82 554 }
Chris@82 555 {
Chris@82 556 E T8q, T8r, T1E, T1P;
Chris@82 557 T8q = T7Z - T80;
Chris@82 558 T8r = T7X + T7W;
Chris@82 559 T8s = T8q - T8r;
Chris@82 560 T8X = T8q + T8r;
Chris@82 561 T1E = FNMS(KP414213562, T1D, T1y);
Chris@82 562 T1P = FMA(KP414213562, T1O, T1J);
Chris@82 563 T1Q = T1E - T1P;
Chris@82 564 T40 = T1P + T1E;
Chris@82 565 }
Chris@82 566 {
Chris@82 567 E T2Y, T2Z, T50, T51;
Chris@82 568 T2Y = FNMS(KP414213562, T1J, T1O);
Chris@82 569 T2Z = FMA(KP414213562, T1y, T1D);
Chris@82 570 T30 = T2Y - T2Z;
Chris@82 571 T3M = T2Y + T2Z;
Chris@82 572 T50 = FMA(KP414213562, T4w, T4x);
Chris@82 573 T51 = FMA(KP414213562, T4z, T4A);
Chris@82 574 T52 = T50 - T51;
Chris@82 575 T5w = T50 + T51;
Chris@82 576 }
Chris@82 577 {
Chris@82 578 E T6m, T6p, T4y, T4B;
Chris@82 579 T6m = T6k + T6l;
Chris@82 580 T6p = T6n + T6o;
Chris@82 581 T6q = T6m - T6p;
Chris@82 582 T7u = T6p + T6m;
Chris@82 583 T4y = FNMS(KP414213562, T4x, T4w);
Chris@82 584 T4B = FNMS(KP414213562, T4A, T4z);
Chris@82 585 T4C = T4y + T4B;
Chris@82 586 T5K = T4B - T4y;
Chris@82 587 }
Chris@82 588 }
Chris@82 589 {
Chris@82 590 E Tv, T10, T7p, T7v, T7y, T7z, T7q, T7A;
Chris@82 591 Tv = Tf + Tu;
Chris@82 592 T10 = TK + TZ;
Chris@82 593 T7p = Tv - T10;
Chris@82 594 T7v = T7t + T7u;
Chris@82 595 T7y = T7w + T7x;
Chris@82 596 T7z = T7v - T7y;
Chris@82 597 cr[0] = Tv + T10;
Chris@82 598 ci[0] = T7v + T7y;
Chris@82 599 T7q = T7o * T7p;
Chris@82 600 cr[WS(rs, 16)] = FNMS(T7s, T7z, T7q);
Chris@82 601 T7A = T7s * T7p;
Chris@82 602 ci[WS(rs, 16)] = FMA(T7o, T7z, T7A);
Chris@82 603 }
Chris@82 604 {
Chris@82 605 E T9p, T9x, T9v, T9z;
Chris@82 606 {
Chris@82 607 E T9n, T9o, T9t, T9u;
Chris@82 608 T9n = FMA(KP707106781, T8X, T8W);
Chris@82 609 T9o = T9c + T9d;
Chris@82 610 T9p = FNMS(KP923879532, T9o, T9n);
Chris@82 611 T9x = FMA(KP923879532, T9o, T9n);
Chris@82 612 T9t = FMA(KP707106781, T9a, T99);
Chris@82 613 T9u = T94 + T91;
Chris@82 614 T9v = FNMS(KP923879532, T9u, T9t);
Chris@82 615 T9z = FMA(KP923879532, T9u, T9t);
Chris@82 616 }
Chris@82 617 {
Chris@82 618 E T9q, T9w, T9y, T9A;
Chris@82 619 T9q = T9m * T9p;
Chris@82 620 cr[WS(rs, 18)] = FNMS(T9s, T9v, T9q);
Chris@82 621 T9w = T9m * T9v;
Chris@82 622 ci[WS(rs, 18)] = FMA(T9s, T9p, T9w);
Chris@82 623 T9y = T3v * T9x;
Chris@82 624 cr[WS(rs, 2)] = FNMS(T3x, T9z, T9y);
Chris@82 625 T9A = T3v * T9z;
Chris@82 626 ci[WS(rs, 2)] = FMA(T3x, T9x, T9A);
Chris@82 627 }
Chris@82 628 }
Chris@82 629 {
Chris@82 630 E T8H, T8Q, T8N, T8T;
Chris@82 631 {
Chris@82 632 E T8F, T8G, T8L, T8M;
Chris@82 633 T8F = FNMS(KP707106781, T82, T7V);
Chris@82 634 T8G = T8u + T8v;
Chris@82 635 T8H = FNMS(KP923879532, T8G, T8F);
Chris@82 636 T8Q = FMA(KP923879532, T8G, T8F);
Chris@82 637 T8L = FNMS(KP707106781, T8s, T8p);
Chris@82 638 T8M = T8h + T8a;
Chris@82 639 T8N = FNMS(KP923879532, T8M, T8L);
Chris@82 640 T8T = FMA(KP923879532, T8M, T8L);
Chris@82 641 }
Chris@82 642 {
Chris@82 643 E T8I, T8O, T8R, T8U;
Chris@82 644 T8I = T8E * T8H;
Chris@82 645 cr[WS(rs, 14)] = FNMS(T8K, T8N, T8I);
Chris@82 646 T8O = T8E * T8N;
Chris@82 647 ci[WS(rs, 14)] = FMA(T8K, T8H, T8O);
Chris@82 648 T8R = T8P * T8Q;
Chris@82 649 cr[WS(rs, 30)] = FNMS(T8S, T8T, T8R);
Chris@82 650 T8U = T8P * T8T;
Chris@82 651 ci[WS(rs, 30)] = FMA(T8S, T8Q, T8U);
Chris@82 652 }
Chris@82 653 }
Chris@82 654 {
Chris@82 655 E T7b, T7j, T7h, T7l;
Chris@82 656 {
Chris@82 657 E T79, T7a, T7f, T7g;
Chris@82 658 T79 = T6j - T6q;
Chris@82 659 T7a = T6Z - T6Y;
Chris@82 660 T7b = FNMS(KP707106781, T7a, T79);
Chris@82 661 T7j = FMA(KP707106781, T7a, T79);
Chris@82 662 T7f = T6W - T6P;
Chris@82 663 T7g = T6A - T6J;
Chris@82 664 T7h = FNMS(KP707106781, T7g, T7f);
Chris@82 665 T7l = FMA(KP707106781, T7g, T7f);
Chris@82 666 }
Chris@82 667 {
Chris@82 668 E T7c, T7i, T7k, T7m;
Chris@82 669 T7c = T78 * T7b;
Chris@82 670 cr[WS(rs, 28)] = FNMS(T7e, T7h, T7c);
Chris@82 671 T7i = T78 * T7h;
Chris@82 672 ci[WS(rs, 28)] = FMA(T7e, T7b, T7i);
Chris@82 673 T7k = T5X * T7j;
Chris@82 674 cr[WS(rs, 12)] = FNMS(T5Z, T7l, T7k);
Chris@82 675 T7m = T5X * T7l;
Chris@82 676 ci[WS(rs, 12)] = FMA(T5Z, T7j, T7m);
Chris@82 677 }
Chris@82 678 }
Chris@82 679 {
Chris@82 680 E T96, T9h, T9f, T9j;
Chris@82 681 {
Chris@82 682 E T8Y, T95, T9b, T9e;
Chris@82 683 T8Y = FNMS(KP707106781, T8X, T8W);
Chris@82 684 T95 = T91 - T94;
Chris@82 685 T96 = FNMS(KP923879532, T95, T8Y);
Chris@82 686 T9h = FMA(KP923879532, T95, T8Y);
Chris@82 687 T9b = FNMS(KP707106781, T9a, T99);
Chris@82 688 T9e = T9c - T9d;
Chris@82 689 T9f = FNMS(KP923879532, T9e, T9b);
Chris@82 690 T9j = FMA(KP923879532, T9e, T9b);
Chris@82 691 }
Chris@82 692 {
Chris@82 693 E T97, T9g, T9i, T9k;
Chris@82 694 T97 = T8V * T96;
Chris@82 695 cr[WS(rs, 26)] = FNMS(T98, T9f, T97);
Chris@82 696 T9g = T98 * T96;
Chris@82 697 ci[WS(rs, 26)] = FMA(T8V, T9f, T9g);
Chris@82 698 T9i = T3G * T9h;
Chris@82 699 cr[WS(rs, 10)] = FNMS(T3J, T9j, T9i);
Chris@82 700 T9k = T3J * T9h;
Chris@82 701 ci[WS(rs, 10)] = FMA(T3G, T9j, T9k);
Chris@82 702 }
Chris@82 703 }
Chris@82 704 {
Chris@82 705 E T6L, T73, T71, T75;
Chris@82 706 {
Chris@82 707 E T6r, T6K, T6X, T70;
Chris@82 708 T6r = T6j + T6q;
Chris@82 709 T6K = T6A + T6J;
Chris@82 710 T6L = FNMS(KP707106781, T6K, T6r);
Chris@82 711 T73 = FMA(KP707106781, T6K, T6r);
Chris@82 712 T6X = T6P + T6W;
Chris@82 713 T70 = T6Y + T6Z;
Chris@82 714 T71 = FNMS(KP707106781, T70, T6X);
Chris@82 715 T75 = FMA(KP707106781, T70, T6X);
Chris@82 716 }
Chris@82 717 {
Chris@82 718 E T6M, T72, T74, T76;
Chris@82 719 T6M = T6i * T6L;
Chris@82 720 cr[WS(rs, 20)] = FNMS(T6O, T71, T6M);
Chris@82 721 T72 = T6O * T6L;
Chris@82 722 ci[WS(rs, 20)] = FMA(T6i, T71, T72);
Chris@82 723 T74 = T3a * T73;
Chris@82 724 cr[WS(rs, 4)] = FNMS(T3d, T75, T74);
Chris@82 725 T76 = T3d * T73;
Chris@82 726 ci[WS(rs, 4)] = FMA(T3a, T75, T76);
Chris@82 727 }
Chris@82 728 }
Chris@82 729 {
Chris@82 730 E T7F, T7N, T7L, T7P;
Chris@82 731 {
Chris@82 732 E T7D, T7E, T7J, T7K;
Chris@82 733 T7D = Tf - Tu;
Chris@82 734 T7E = T7x - T7w;
Chris@82 735 T7F = T7D - T7E;
Chris@82 736 T7N = T7D + T7E;
Chris@82 737 T7J = T7t - T7u;
Chris@82 738 T7K = TK - TZ;
Chris@82 739 T7L = T7J - T7K;
Chris@82 740 T7P = T7K + T7J;
Chris@82 741 }
Chris@82 742 {
Chris@82 743 E T7G, T7M, T7O, T7Q;
Chris@82 744 T7G = T7C * T7F;
Chris@82 745 cr[WS(rs, 24)] = FNMS(T7I, T7L, T7G);
Chris@82 746 T7M = T7C * T7L;
Chris@82 747 ci[WS(rs, 24)] = FMA(T7I, T7F, T7M);
Chris@82 748 T7O = T4p * T7N;
Chris@82 749 cr[WS(rs, 8)] = FNMS(T4r, T7P, T7O);
Chris@82 750 T7Q = T4p * T7P;
Chris@82 751 ci[WS(rs, 8)] = FMA(T4r, T7N, T7Q);
Chris@82 752 }
Chris@82 753 }
Chris@82 754 {
Chris@82 755 E T8j, T8z, T8x, T8B;
Chris@82 756 {
Chris@82 757 E T83, T8i, T8t, T8w;
Chris@82 758 T83 = FMA(KP707106781, T82, T7V);
Chris@82 759 T8i = T8a - T8h;
Chris@82 760 T8j = FNMS(KP923879532, T8i, T83);
Chris@82 761 T8z = FMA(KP923879532, T8i, T83);
Chris@82 762 T8t = FMA(KP707106781, T8s, T8p);
Chris@82 763 T8w = T8u - T8v;
Chris@82 764 T8x = FNMS(KP923879532, T8w, T8t);
Chris@82 765 T8B = FMA(KP923879532, T8w, T8t);
Chris@82 766 }
Chris@82 767 {
Chris@82 768 E T8k, T8y, T8A, T8C;
Chris@82 769 T8k = T7S * T8j;
Chris@82 770 cr[WS(rs, 22)] = FNMS(T8m, T8x, T8k);
Chris@82 771 T8y = T8m * T8j;
Chris@82 772 ci[WS(rs, 22)] = FMA(T7S, T8x, T8y);
Chris@82 773 T8A = T16 * T8z;
Chris@82 774 cr[WS(rs, 6)] = FNMS(T1a, T8B, T8A);
Chris@82 775 T8C = T1a * T8z;
Chris@82 776 ci[WS(rs, 6)] = FMA(T16, T8B, T8C);
Chris@82 777 }
Chris@82 778 }
Chris@82 779 {
Chris@82 780 E T3r, T2L, T3s, T3f, T35, T3z, T3j, T3o;
Chris@82 781 T3r = FNMS(KP923879532, T30, T2X);
Chris@82 782 {
Chris@82 783 E T1R, T2i, T2J, T2K;
Chris@82 784 T1R = FMA(KP923879532, T1Q, T1t);
Chris@82 785 T2i = FMA(KP668178637, T2h, T28);
Chris@82 786 T2J = FNMS(KP668178637, T2I, T2z);
Chris@82 787 T2K = T2i - T2J;
Chris@82 788 T2L = FNMS(KP831469612, T2K, T1R);
Chris@82 789 T3s = T2J + T2i;
Chris@82 790 T3f = FMA(KP831469612, T2K, T1R);
Chris@82 791 }
Chris@82 792 {
Chris@82 793 E T31, T3m, T34, T3n, T32, T33;
Chris@82 794 T31 = FMA(KP923879532, T30, T2X);
Chris@82 795 T3m = FNMS(KP923879532, T1Q, T1t);
Chris@82 796 T32 = FMA(KP668178637, T2z, T2I);
Chris@82 797 T33 = FNMS(KP668178637, T28, T2h);
Chris@82 798 T34 = T32 - T33;
Chris@82 799 T3n = T32 + T33;
Chris@82 800 T35 = FNMS(KP831469612, T34, T31);
Chris@82 801 T3z = FMA(KP831469612, T3n, T3m);
Chris@82 802 T3j = FMA(KP831469612, T34, T31);
Chris@82 803 T3o = FNMS(KP831469612, T3n, T3m);
Chris@82 804 }
Chris@82 805 {
Chris@82 806 E T2M, T36, T3g, T3k;
Chris@82 807 T2M = T1c * T2L;
Chris@82 808 cr[WS(rs, 21)] = FNMS(T2O, T35, T2M);
Chris@82 809 T36 = T1c * T35;
Chris@82 810 ci[WS(rs, 21)] = FMA(T2O, T2L, T36);
Chris@82 811 T3g = T3e * T3f;
Chris@82 812 cr[WS(rs, 5)] = FNMS(T3i, T3j, T3g);
Chris@82 813 T3k = T3e * T3j;
Chris@82 814 ci[WS(rs, 5)] = FMA(T3i, T3f, T3k);
Chris@82 815 {
Chris@82 816 E T3A, T3E, T3D, T3p, T3u, T3t;
Chris@82 817 T3A = T3y * T3z;
Chris@82 818 T3E = T3C * T3z;
Chris@82 819 T3D = FMA(KP831469612, T3s, T3r);
Chris@82 820 cr[WS(rs, 29)] = FNMS(T3C, T3D, T3A);
Chris@82 821 ci[WS(rs, 29)] = FMA(T3y, T3D, T3E);
Chris@82 822 T3p = T3l * T3o;
Chris@82 823 T3u = T3q * T3o;
Chris@82 824 T3t = FNMS(KP831469612, T3s, T3r);
Chris@82 825 cr[WS(rs, 13)] = FNMS(T3q, T3t, T3p);
Chris@82 826 ci[WS(rs, 13)] = FMA(T3l, T3t, T3u);
Chris@82 827 }
Chris@82 828 }
Chris@82 829 }
Chris@82 830 {
Chris@82 831 E T53, T56, T5p, T5h, T4T, T5r, T59, T5n;
Chris@82 832 T53 = FMA(KP923879532, T52, T4Z);
Chris@82 833 {
Chris@82 834 E T5f, T54, T55, T5g;
Chris@82 835 T5f = FMA(KP923879532, T4C, T4v);
Chris@82 836 T54 = FMA(KP668178637, T4G, T4J);
Chris@82 837 T55 = FMA(KP668178637, T4N, T4Q);
Chris@82 838 T5g = T54 + T55;
Chris@82 839 T56 = T54 - T55;
Chris@82 840 T5p = FMA(KP831469612, T5g, T5f);
Chris@82 841 T5h = FNMS(KP831469612, T5g, T5f);
Chris@82 842 }
Chris@82 843 {
Chris@82 844 E T4D, T5l, T4S, T5m, T4K, T4R;
Chris@82 845 T4D = FNMS(KP923879532, T4C, T4v);
Chris@82 846 T5l = FNMS(KP923879532, T52, T4Z);
Chris@82 847 T4K = FNMS(KP668178637, T4J, T4G);
Chris@82 848 T4R = FNMS(KP668178637, T4Q, T4N);
Chris@82 849 T4S = T4K + T4R;
Chris@82 850 T5m = T4K - T4R;
Chris@82 851 T4T = FNMS(KP831469612, T4S, T4D);
Chris@82 852 T5r = FNMS(KP831469612, T5m, T5l);
Chris@82 853 T59 = FMA(KP831469612, T4S, T4D);
Chris@82 854 T5n = FMA(KP831469612, T5m, T5l);
Chris@82 855 }
Chris@82 856 {
Chris@82 857 E T5i, T5o, T5q, T5s;
Chris@82 858 T5i = T5e * T5h;
Chris@82 859 cr[WS(rs, 11)] = FNMS(T5k, T5n, T5i);
Chris@82 860 T5o = T5e * T5n;
Chris@82 861 ci[WS(rs, 11)] = FMA(T5k, T5h, T5o);
Chris@82 862 T5q = T17 * T5p;
Chris@82 863 cr[WS(rs, 27)] = FNMS(T1b, T5r, T5q);
Chris@82 864 T5s = T17 * T5r;
Chris@82 865 ci[WS(rs, 27)] = FMA(T1b, T5p, T5s);
Chris@82 866 {
Chris@82 867 E T5a, T5c, T5b, T4U, T58, T57;
Chris@82 868 T5a = T11 * T59;
Chris@82 869 T5c = T14 * T59;
Chris@82 870 T5b = FMA(KP831469612, T56, T53);
Chris@82 871 cr[WS(rs, 3)] = FNMS(T14, T5b, T5a);
Chris@82 872 ci[WS(rs, 3)] = FMA(T11, T5b, T5c);
Chris@82 873 T4U = T4s * T4T;
Chris@82 874 T58 = T4W * T4T;
Chris@82 875 T57 = FNMS(KP831469612, T56, T53);
Chris@82 876 cr[WS(rs, 19)] = FNMS(T4W, T57, T4U);
Chris@82 877 ci[WS(rs, 19)] = FMA(T4s, T57, T58);
Chris@82 878 }
Chris@82 879 }
Chris@82 880 }
Chris@82 881 {
Chris@82 882 E T41, T44, T4l, T4e, T3V, T4n, T47, T4j;
Chris@82 883 T41 = FMA(KP923879532, T40, T3Z);
Chris@82 884 {
Chris@82 885 E T4c, T42, T43, T4d;
Chris@82 886 T4c = FNMS(KP923879532, T3M, T3L);
Chris@82 887 T42 = FMA(KP198912367, T3O, T3P);
Chris@82 888 T43 = FNMS(KP198912367, T3R, T3S);
Chris@82 889 T4d = T43 - T42;
Chris@82 890 T44 = T42 + T43;
Chris@82 891 T4l = FMA(KP980785280, T4d, T4c);
Chris@82 892 T4e = FNMS(KP980785280, T4d, T4c);
Chris@82 893 }
Chris@82 894 {
Chris@82 895 E T3N, T4h, T3U, T4i, T3Q, T3T;
Chris@82 896 T3N = FMA(KP923879532, T3M, T3L);
Chris@82 897 T4h = FNMS(KP923879532, T40, T3Z);
Chris@82 898 T3Q = FNMS(KP198912367, T3P, T3O);
Chris@82 899 T3T = FMA(KP198912367, T3S, T3R);
Chris@82 900 T3U = T3Q + T3T;
Chris@82 901 T4i = T3Q - T3T;
Chris@82 902 T3V = FNMS(KP980785280, T3U, T3N);
Chris@82 903 T4n = FMA(KP980785280, T4i, T4h);
Chris@82 904 T47 = FMA(KP980785280, T3U, T3N);
Chris@82 905 T4j = FNMS(KP980785280, T4i, T4h);
Chris@82 906 }
Chris@82 907 {
Chris@82 908 E T4f, T4k, T4m, T4o;
Chris@82 909 T4f = T4b * T4e;
Chris@82 910 cr[WS(rs, 25)] = FNMS(T4g, T4j, T4f);
Chris@82 911 T4k = T4b * T4j;
Chris@82 912 ci[WS(rs, 25)] = FMA(T4g, T4e, T4k);
Chris@82 913 T4m = T12 * T4l;
Chris@82 914 cr[WS(rs, 9)] = FNMS(T15, T4n, T4m);
Chris@82 915 T4o = T12 * T4n;
Chris@82 916 ci[WS(rs, 9)] = FMA(T15, T4l, T4o);
Chris@82 917 {
Chris@82 918 E T48, T4a, T49, T3W, T46, T45;
Chris@82 919 T48 = T37 * T47;
Chris@82 920 T4a = T39 * T47;
Chris@82 921 T49 = FMA(KP980785280, T44, T41);
Chris@82 922 cr[WS(rs, 1)] = FNMS(T39, T49, T48);
Chris@82 923 ci[WS(rs, 1)] = FMA(T37, T49, T4a);
Chris@82 924 T3W = T3K * T3V;
Chris@82 925 T46 = T3Y * T3V;
Chris@82 926 T45 = FNMS(KP980785280, T44, T41);
Chris@82 927 cr[WS(rs, 17)] = FNMS(T3Y, T45, T3W);
Chris@82 928 ci[WS(rs, 17)] = FMA(T3K, T45, T46);
Chris@82 929 }
Chris@82 930 }
Chris@82 931 }
Chris@82 932 {
Chris@82 933 E T5L, T5O, T6c, T63, T5F, T6f, T5S, T69;
Chris@82 934 T5L = FMA(KP923879532, T5K, T5J);
Chris@82 935 {
Chris@82 936 E T61, T5M, T5N, T62;
Chris@82 937 T61 = FMA(KP923879532, T5w, T5v);
Chris@82 938 T5M = FMA(KP198912367, T5y, T5z);
Chris@82 939 T5N = FMA(KP198912367, T5B, T5C);
Chris@82 940 T62 = T5M + T5N;
Chris@82 941 T5O = T5M - T5N;
Chris@82 942 T6c = FMA(KP980785280, T62, T61);
Chris@82 943 T63 = FNMS(KP980785280, T62, T61);
Chris@82 944 }
Chris@82 945 {
Chris@82 946 E T5x, T67, T5E, T68, T5A, T5D;
Chris@82 947 T5x = FNMS(KP923879532, T5w, T5v);
Chris@82 948 T67 = FNMS(KP923879532, T5K, T5J);
Chris@82 949 T5A = FNMS(KP198912367, T5z, T5y);
Chris@82 950 T5D = FNMS(KP198912367, T5C, T5B);
Chris@82 951 T5E = T5A + T5D;
Chris@82 952 T68 = T5D - T5A;
Chris@82 953 T5F = FMA(KP980785280, T5E, T5x);
Chris@82 954 T6f = FNMS(KP980785280, T68, T67);
Chris@82 955 T5S = FNMS(KP980785280, T5E, T5x);
Chris@82 956 T69 = FMA(KP980785280, T68, T67);
Chris@82 957 }
Chris@82 958 {
Chris@82 959 E T64, T6a, T6d, T6g;
Chris@82 960 T64 = T60 * T63;
Chris@82 961 cr[WS(rs, 15)] = FNMS(T66, T69, T64);
Chris@82 962 T6a = T60 * T69;
Chris@82 963 ci[WS(rs, 15)] = FMA(T66, T63, T6a);
Chris@82 964 T6d = T6b * T6c;
Chris@82 965 cr[WS(rs, 31)] = FNMS(T6e, T6f, T6d);
Chris@82 966 T6g = T6b * T6f;
Chris@82 967 ci[WS(rs, 31)] = FMA(T6e, T6c, T6g);
Chris@82 968 {
Chris@82 969 E T5T, T5W, T5V, T5G, T5Q, T5P;
Chris@82 970 T5T = T5R * T5S;
Chris@82 971 T5W = T5U * T5S;
Chris@82 972 T5V = FMA(KP980785280, T5O, T5L);
Chris@82 973 cr[WS(rs, 7)] = FNMS(T5U, T5V, T5T);
Chris@82 974 ci[WS(rs, 7)] = FMA(T5R, T5V, T5W);
Chris@82 975 T5G = T5u * T5F;
Chris@82 976 T5Q = T5I * T5F;
Chris@82 977 T5P = FNMS(KP980785280, T5O, T5L);
Chris@82 978 cr[WS(rs, 23)] = FNMS(T5I, T5P, T5G);
Chris@82 979 ci[WS(rs, 23)] = FMA(T5u, T5P, T5Q);
Chris@82 980 }
Chris@82 981 }
Chris@82 982 }
Chris@82 983 }
Chris@82 984 }
Chris@82 985 }
Chris@82 986 }
Chris@82 987
Chris@82 988 static const tw_instr twinstr[] = {
Chris@82 989 {TW_CEXP, 1, 1},
Chris@82 990 {TW_CEXP, 1, 3},
Chris@82 991 {TW_CEXP, 1, 9},
Chris@82 992 {TW_CEXP, 1, 27},
Chris@82 993 {TW_NEXT, 1, 0}
Chris@82 994 };
Chris@82 995
Chris@82 996 static const hc2hc_desc desc = { 32, "hb2_32", twinstr, &GENUS, {236, 98, 252, 0} };
Chris@82 997
Chris@82 998 void X(codelet_hb2_32) (planner *p) {
Chris@82 999 X(khc2hc_register) (p, hb2_32, &desc);
Chris@82 1000 }
Chris@82 1001 #else
Chris@82 1002
Chris@82 1003 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include rdft/scalar/hb.h */
Chris@82 1004
Chris@82 1005 /*
Chris@82 1006 * This function contains 488 FP additions, 280 FP multiplications,
Chris@82 1007 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
Chris@82 1008 * 160 stack variables, 7 constants, and 128 memory accesses
Chris@82 1009 */
Chris@82 1010 #include "rdft/scalar/hb.h"
Chris@82 1011
Chris@82 1012 static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 1013 {
Chris@82 1014 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 1015 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 1016 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 1017 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 1018 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 1019 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 1020 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1021 {
Chris@82 1022 INT m;
Chris@82 1023 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 1024 E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t;
Chris@82 1025 E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z;
Chris@82 1026 E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F;
Chris@82 1027 E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n;
Chris@82 1028 E T4f, T4j;
Chris@82 1029 {
Chris@82 1030 E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J;
Chris@82 1031 E T3a;
Chris@82 1032 {
Chris@82 1033 E T13, T1b, T16, T1a;
Chris@82 1034 T11 = W[0];
Chris@82 1035 T14 = W[1];
Chris@82 1036 T12 = W[2];
Chris@82 1037 T15 = W[3];
Chris@82 1038 T13 = T11 * T12;
Chris@82 1039 T1b = T14 * T12;
Chris@82 1040 T16 = T14 * T15;
Chris@82 1041 T1a = T11 * T15;
Chris@82 1042 T17 = T13 + T16;
Chris@82 1043 T2z = T13 - T16;
Chris@82 1044 T2B = T1a + T1b;
Chris@82 1045 T1c = T1a - T1b;
Chris@82 1046 T18 = W[4];
Chris@82 1047 T2P = T12 * T18;
Chris@82 1048 T3q = T14 * T18;
Chris@82 1049 T2U = T15 * T18;
Chris@82 1050 T3l = T11 * T18;
Chris@82 1051 T1d = W[5];
Chris@82 1052 T2Q = T15 * T1d;
Chris@82 1053 T3p = T11 * T1d;
Chris@82 1054 T2T = T12 * T1d;
Chris@82 1055 T3m = T14 * T1d;
Chris@82 1056 T1g = W[6];
Chris@82 1057 T2D = T11 * T1g;
Chris@82 1058 T3g = T15 * T1g;
Chris@82 1059 T2K = T14 * T1g;
Chris@82 1060 T39 = T12 * T1g;
Chris@82 1061 T1k = W[7];
Chris@82 1062 T2E = T14 * T1k;
Chris@82 1063 T3f = T12 * T1k;
Chris@82 1064 T2J = T11 * T1k;
Chris@82 1065 T3a = T15 * T1k;
Chris@82 1066 }
Chris@82 1067 T2F = T2D - T2E;
Chris@82 1068 T2L = T2J + T2K;
Chris@82 1069 T3t = T39 - T3a;
Chris@82 1070 T4H = T2J - T2K;
Chris@82 1071 T3h = T3f - T3g;
Chris@82 1072 T3V = T3f + T3g;
Chris@82 1073 T3b = T39 + T3a;
Chris@82 1074 T4v = T2D + T2E;
Chris@82 1075 T4T = FMA(T18, T1g, T1d * T1k);
Chris@82 1076 T4X = FNMS(T1d, T1g, T18 * T1k);
Chris@82 1077 {
Chris@82 1078 E T6r, T6s, T6x, T6y;
Chris@82 1079 T6r = T17 * T1g;
Chris@82 1080 T6s = T1c * T1k;
Chris@82 1081 T6t = T6r - T6s;
Chris@82 1082 T71 = T6r + T6s;
Chris@82 1083 T6x = T17 * T1k;
Chris@82 1084 T6y = T1c * T1g;
Chris@82 1085 T6z = T6x + T6y;
Chris@82 1086 T75 = T6x - T6y;
Chris@82 1087 }
Chris@82 1088 {
Chris@82 1089 E T7Z, T80, T8d, T8e;
Chris@82 1090 T7Z = T2z * T1g;
Chris@82 1091 T80 = T2B * T1k;
Chris@82 1092 T81 = T7Z + T80;
Chris@82 1093 T8x = T7Z - T80;
Chris@82 1094 T8d = T2z * T1k;
Chris@82 1095 T8e = T2B * T1g;
Chris@82 1096 T8f = T8d - T8e;
Chris@82 1097 T8z = T8d + T8e;
Chris@82 1098 T2R = T2P - T2Q;
Chris@82 1099 T2V = T2T + T2U;
Chris@82 1100 T8p = FMA(T2R, T1g, T2V * T1k);
Chris@82 1101 T8t = FNMS(T2V, T1g, T2R * T1k);
Chris@82 1102 }
Chris@82 1103 T4r = T2P + T2Q;
Chris@82 1104 T4t = T2T - T2U;
Chris@82 1105 T53 = FMA(T4r, T1g, T4t * T1k);
Chris@82 1106 T69 = FNMS(T4t, T1g, T4r * T1k);
Chris@82 1107 T3n = T3l + T3m;
Chris@82 1108 T3r = T3p - T3q;
Chris@82 1109 T7P = FMA(T3n, T1g, T3r * T1k);
Chris@82 1110 T7T = FNMS(T3r, T1g, T3n * T1k);
Chris@82 1111 T4P = T3l - T3m;
Chris@82 1112 T4R = T3p + T3q;
Chris@82 1113 T6F = FMA(T4P, T1g, T4R * T1k);
Chris@82 1114 T6R = FNMS(T4R, T1g, T4P * T1k);
Chris@82 1115 {
Chris@82 1116 E T19, T1e, T1h, T1i;
Chris@82 1117 T19 = T17 * T18;
Chris@82 1118 T1e = T1c * T1d;
Chris@82 1119 T1f = T19 + T1e;
Chris@82 1120 T2X = T19 - T1e;
Chris@82 1121 T1h = T17 * T1d;
Chris@82 1122 T1i = T1c * T18;
Chris@82 1123 T1j = T1h - T1i;
Chris@82 1124 T2Y = T1h + T1i;
Chris@82 1125 }
Chris@82 1126 T1l = FMA(T1f, T1g, T1j * T1k);
Chris@82 1127 T31 = FNMS(T2Y, T1g, T2X * T1k);
Chris@82 1128 T2d = FNMS(T1j, T1g, T1f * T1k);
Chris@82 1129 T2Z = FMA(T2X, T1g, T2Y * T1k);
Chris@82 1130 {
Chris@82 1131 E T47, T48, T4a, T4b;
Chris@82 1132 T47 = T2z * T18;
Chris@82 1133 T48 = T2B * T1d;
Chris@82 1134 T49 = T47 - T48;
Chris@82 1135 T4h = T47 + T48;
Chris@82 1136 T4a = T2z * T1d;
Chris@82 1137 T4b = T2B * T18;
Chris@82 1138 T4c = T4a + T4b;
Chris@82 1139 T4i = T4a - T4b;
Chris@82 1140 }
Chris@82 1141 T4d = FMA(T49, T1g, T4c * T1k);
Chris@82 1142 T4n = FNMS(T4i, T1g, T4h * T1k);
Chris@82 1143 T4f = FNMS(T4c, T1g, T49 * T1k);
Chris@82 1144 T4j = FMA(T4h, T1g, T4i * T1k);
Chris@82 1145 }
Chris@82 1146 {
Chris@82 1147 E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w;
Chris@82 1148 E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h;
Chris@82 1149 E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M;
Chris@82 1150 E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M;
Chris@82 1151 E T65, T6N, T3K, T4A;
Chris@82 1152 {
Chris@82 1153 E T3, T54, T2o, T58, T2r, T5b, T6, T6a, Ta, T57, T2h, T6b, T2k, T55, Td;
Chris@82 1154 E T5a;
Chris@82 1155 {
Chris@82 1156 E T1, T2, T2m, T2n;
Chris@82 1157 T1 = cr[0];
Chris@82 1158 T2 = ci[WS(rs, 15)];
Chris@82 1159 T3 = T1 + T2;
Chris@82 1160 T54 = T1 - T2;
Chris@82 1161 T2m = ci[WS(rs, 27)];
Chris@82 1162 T2n = cr[WS(rs, 20)];
Chris@82 1163 T2o = T2m - T2n;
Chris@82 1164 T58 = T2m + T2n;
Chris@82 1165 }
Chris@82 1166 {
Chris@82 1167 E T2p, T2q, T4, T5;
Chris@82 1168 T2p = ci[WS(rs, 19)];
Chris@82 1169 T2q = cr[WS(rs, 28)];
Chris@82 1170 T2r = T2p - T2q;
Chris@82 1171 T5b = T2p + T2q;
Chris@82 1172 T4 = cr[WS(rs, 8)];
Chris@82 1173 T5 = ci[WS(rs, 7)];
Chris@82 1174 T6 = T4 + T5;
Chris@82 1175 T6a = T4 - T5;
Chris@82 1176 }
Chris@82 1177 {
Chris@82 1178 E T8, T9, T2f, T2g;
Chris@82 1179 T8 = cr[WS(rs, 4)];
Chris@82 1180 T9 = ci[WS(rs, 11)];
Chris@82 1181 Ta = T8 + T9;
Chris@82 1182 T57 = T8 - T9;
Chris@82 1183 T2f = ci[WS(rs, 31)];
Chris@82 1184 T2g = cr[WS(rs, 16)];
Chris@82 1185 T2h = T2f - T2g;
Chris@82 1186 T6b = T2f + T2g;
Chris@82 1187 }
Chris@82 1188 {
Chris@82 1189 E T2i, T2j, Tb, Tc;
Chris@82 1190 T2i = ci[WS(rs, 23)];
Chris@82 1191 T2j = cr[WS(rs, 24)];
Chris@82 1192 T2k = T2i - T2j;
Chris@82 1193 T55 = T2i + T2j;
Chris@82 1194 Tb = ci[WS(rs, 3)];
Chris@82 1195 Tc = cr[WS(rs, 12)];
Chris@82 1196 Td = Tb + Tc;
Chris@82 1197 T5a = Tb - Tc;
Chris@82 1198 }
Chris@82 1199 {
Chris@82 1200 E T7, Te, T2l, T2s;
Chris@82 1201 T56 = T54 - T55;
Chris@82 1202 T7b = T54 + T55;
Chris@82 1203 T7C = T6b - T6a;
Chris@82 1204 T6c = T6a + T6b;
Chris@82 1205 T7 = T3 + T6;
Chris@82 1206 Te = Ta + Td;
Chris@82 1207 Tf = T7 + Te;
Chris@82 1208 T1m = T7 - Te;
Chris@82 1209 {
Chris@82 1210 E T6d, T6e, T3W, T3X;
Chris@82 1211 T6d = T57 + T58;
Chris@82 1212 T6e = T5a + T5b;
Chris@82 1213 T6f = KP707106781 * (T6d - T6e);
Chris@82 1214 T7c = KP707106781 * (T6d + T6e);
Chris@82 1215 T3W = T2h - T2k;
Chris@82 1216 T3X = Ta - Td;
Chris@82 1217 T3Y = T3W - T3X;
Chris@82 1218 T4I = T3X + T3W;
Chris@82 1219 }
Chris@82 1220 T2l = T2h + T2k;
Chris@82 1221 T2s = T2o + T2r;
Chris@82 1222 T2t = T2l - T2s;
Chris@82 1223 T32 = T2l + T2s;
Chris@82 1224 {
Chris@82 1225 E T59, T5c, T3u, T3v;
Chris@82 1226 T59 = T57 - T58;
Chris@82 1227 T5c = T5a - T5b;
Chris@82 1228 T5d = KP707106781 * (T59 + T5c);
Chris@82 1229 T7D = KP707106781 * (T59 - T5c);
Chris@82 1230 T3u = T3 - T6;
Chris@82 1231 T3v = T2r - T2o;
Chris@82 1232 T3w = T3u - T3v;
Chris@82 1233 T4w = T3u + T3v;
Chris@82 1234 }
Chris@82 1235 }
Chris@82 1236 }
Chris@82 1237 {
Chris@82 1238 E Ti, T5p, T1w, T5n, T1z, T5q, Tl, T5m, Tp, T5i, T1p, T5g, T1s, T5j, Ts;
Chris@82 1239 E T5f;
Chris@82 1240 {
Chris@82 1241 E Tg, Th, T1u, T1v;
Chris@82 1242 Tg = cr[WS(rs, 2)];
Chris@82 1243 Th = ci[WS(rs, 13)];
Chris@82 1244 Ti = Tg + Th;
Chris@82 1245 T5p = Tg - Th;
Chris@82 1246 T1u = ci[WS(rs, 29)];
Chris@82 1247 T1v = cr[WS(rs, 18)];
Chris@82 1248 T1w = T1u - T1v;
Chris@82 1249 T5n = T1u + T1v;
Chris@82 1250 }
Chris@82 1251 {
Chris@82 1252 E T1x, T1y, Tj, Tk;
Chris@82 1253 T1x = ci[WS(rs, 21)];
Chris@82 1254 T1y = cr[WS(rs, 26)];
Chris@82 1255 T1z = T1x - T1y;
Chris@82 1256 T5q = T1x + T1y;
Chris@82 1257 Tj = cr[WS(rs, 10)];
Chris@82 1258 Tk = ci[WS(rs, 5)];
Chris@82 1259 Tl = Tj + Tk;
Chris@82 1260 T5m = Tj - Tk;
Chris@82 1261 }
Chris@82 1262 {
Chris@82 1263 E Tn, To, T1n, T1o;
Chris@82 1264 Tn = ci[WS(rs, 1)];
Chris@82 1265 To = cr[WS(rs, 14)];
Chris@82 1266 Tp = Tn + To;
Chris@82 1267 T5i = Tn - To;
Chris@82 1268 T1n = ci[WS(rs, 17)];
Chris@82 1269 T1o = cr[WS(rs, 30)];
Chris@82 1270 T1p = T1n - T1o;
Chris@82 1271 T5g = T1n + T1o;
Chris@82 1272 }
Chris@82 1273 {
Chris@82 1274 E T1q, T1r, Tq, Tr;
Chris@82 1275 T1q = ci[WS(rs, 25)];
Chris@82 1276 T1r = cr[WS(rs, 22)];
Chris@82 1277 T1s = T1q - T1r;
Chris@82 1278 T5j = T1q + T1r;
Chris@82 1279 Tq = cr[WS(rs, 6)];
Chris@82 1280 Tr = ci[WS(rs, 9)];
Chris@82 1281 Ts = Tq + Tr;
Chris@82 1282 T5f = Tq - Tr;
Chris@82 1283 }
Chris@82 1284 {
Chris@82 1285 E Tm, Tt, T7e, T7f;
Chris@82 1286 Tm = Ti + Tl;
Chris@82 1287 Tt = Tp + Ts;
Chris@82 1288 Tu = Tm + Tt;
Chris@82 1289 T2e = Tm - Tt;
Chris@82 1290 T7e = T5p + T5q;
Chris@82 1291 T7f = T5n - T5m;
Chris@82 1292 T7g = FNMS(KP923879532, T7f, KP382683432 * T7e);
Chris@82 1293 T7F = FMA(KP382683432, T7f, KP923879532 * T7e);
Chris@82 1294 }
Chris@82 1295 {
Chris@82 1296 E T7h, T7i, T1t, T1A;
Chris@82 1297 T7h = T5i + T5j;
Chris@82 1298 T7i = T5f + T5g;
Chris@82 1299 T7j = FNMS(KP923879532, T7i, KP382683432 * T7h);
Chris@82 1300 T7G = FMA(KP382683432, T7i, KP923879532 * T7h);
Chris@82 1301 T1t = T1p + T1s;
Chris@82 1302 T1A = T1w + T1z;
Chris@82 1303 T1B = T1t - T1A;
Chris@82 1304 T33 = T1A + T1t;
Chris@82 1305 }
Chris@82 1306 {
Chris@82 1307 E T3x, T3y, T5h, T5k;
Chris@82 1308 T3x = T1p - T1s;
Chris@82 1309 T3y = Tp - Ts;
Chris@82 1310 T3z = T3x - T3y;
Chris@82 1311 T40 = T3y + T3x;
Chris@82 1312 T5h = T5f - T5g;
Chris@82 1313 T5k = T5i - T5j;
Chris@82 1314 T5l = FNMS(KP382683432, T5k, KP923879532 * T5h);
Chris@82 1315 T6i = FMA(KP382683432, T5h, KP923879532 * T5k);
Chris@82 1316 }
Chris@82 1317 {
Chris@82 1318 E T5o, T5r, T3A, T3B;
Chris@82 1319 T5o = T5m + T5n;
Chris@82 1320 T5r = T5p - T5q;
Chris@82 1321 T5s = FMA(KP923879532, T5o, KP382683432 * T5r);
Chris@82 1322 T6h = FNMS(KP382683432, T5o, KP923879532 * T5r);
Chris@82 1323 T3A = Ti - Tl;
Chris@82 1324 T3B = T1w - T1z;
Chris@82 1325 T3C = T3A + T3B;
Chris@82 1326 T3Z = T3A - T3B;
Chris@82 1327 }
Chris@82 1328 }
Chris@82 1329 {
Chris@82 1330 E Ty, T5v, TB, T5G, T1J, T5w, T1G, T5H, TI, T5K, T1Q, T5D, TF, T5J, T1N;
Chris@82 1331 E T5A;
Chris@82 1332 {
Chris@82 1333 E Tw, Tx, T1E, T1F;
Chris@82 1334 Tw = cr[WS(rs, 1)];
Chris@82 1335 Tx = ci[WS(rs, 14)];
Chris@82 1336 Ty = Tw + Tx;
Chris@82 1337 T5v = Tw - Tx;
Chris@82 1338 {
Chris@82 1339 E Tz, TA, T1H, T1I;
Chris@82 1340 Tz = cr[WS(rs, 9)];
Chris@82 1341 TA = ci[WS(rs, 6)];
Chris@82 1342 TB = Tz + TA;
Chris@82 1343 T5G = Tz - TA;
Chris@82 1344 T1H = ci[WS(rs, 22)];
Chris@82 1345 T1I = cr[WS(rs, 25)];
Chris@82 1346 T1J = T1H - T1I;
Chris@82 1347 T5w = T1H + T1I;
Chris@82 1348 }
Chris@82 1349 T1E = ci[WS(rs, 30)];
Chris@82 1350 T1F = cr[WS(rs, 17)];
Chris@82 1351 T1G = T1E - T1F;
Chris@82 1352 T5H = T1E + T1F;
Chris@82 1353 {
Chris@82 1354 E TG, TH, T5B, T1O, T1P, T5C;
Chris@82 1355 TG = ci[WS(rs, 2)];
Chris@82 1356 TH = cr[WS(rs, 13)];
Chris@82 1357 T5B = TG - TH;
Chris@82 1358 T1O = ci[WS(rs, 18)];
Chris@82 1359 T1P = cr[WS(rs, 29)];
Chris@82 1360 T5C = T1O + T1P;
Chris@82 1361 TI = TG + TH;
Chris@82 1362 T5K = T5B + T5C;
Chris@82 1363 T1Q = T1O - T1P;
Chris@82 1364 T5D = T5B - T5C;
Chris@82 1365 }
Chris@82 1366 {
Chris@82 1367 E TD, TE, T5y, T1L, T1M, T5z;
Chris@82 1368 TD = cr[WS(rs, 5)];
Chris@82 1369 TE = ci[WS(rs, 10)];
Chris@82 1370 T5y = TD - TE;
Chris@82 1371 T1L = ci[WS(rs, 26)];
Chris@82 1372 T1M = cr[WS(rs, 21)];
Chris@82 1373 T5z = T1L + T1M;
Chris@82 1374 TF = TD + TE;
Chris@82 1375 T5J = T5y + T5z;
Chris@82 1376 T1N = T1L - T1M;
Chris@82 1377 T5A = T5y - T5z;
Chris@82 1378 }
Chris@82 1379 }
Chris@82 1380 {
Chris@82 1381 E TC, TJ, T7t, T7u;
Chris@82 1382 TC = Ty + TB;
Chris@82 1383 TJ = TF + TI;
Chris@82 1384 TK = TC + TJ;
Chris@82 1385 T1D = TC - TJ;
Chris@82 1386 T7t = T5H - T5G;
Chris@82 1387 T7u = KP707106781 * (T5A - T5D);
Chris@82 1388 T7v = T7t + T7u;
Chris@82 1389 T86 = T7t - T7u;
Chris@82 1390 }
Chris@82 1391 {
Chris@82 1392 E T7w, T7x, T1K, T1R;
Chris@82 1393 T7w = T5v + T5w;
Chris@82 1394 T7x = KP707106781 * (T5J + T5K);
Chris@82 1395 T7y = T7w - T7x;
Chris@82 1396 T85 = T7w + T7x;
Chris@82 1397 T1K = T1G + T1J;
Chris@82 1398 T1R = T1N + T1Q;
Chris@82 1399 T1S = T1K - T1R;
Chris@82 1400 T35 = T1K + T1R;
Chris@82 1401 }
Chris@82 1402 {
Chris@82 1403 E T3M, T3N, T5x, T5E;
Chris@82 1404 T3M = T1G - T1J;
Chris@82 1405 T3N = TF - TI;
Chris@82 1406 T3O = T3M - T3N;
Chris@82 1407 T4C = T3N + T3M;
Chris@82 1408 T5x = T5v - T5w;
Chris@82 1409 T5E = KP707106781 * (T5A + T5D);
Chris@82 1410 T5F = T5x - T5E;
Chris@82 1411 T6J = T5x + T5E;
Chris@82 1412 }
Chris@82 1413 {
Chris@82 1414 E T5I, T5L, T3P, T3Q;
Chris@82 1415 T5I = T5G + T5H;
Chris@82 1416 T5L = KP707106781 * (T5J - T5K);
Chris@82 1417 T5M = T5I - T5L;
Chris@82 1418 T6K = T5I + T5L;
Chris@82 1419 T3P = Ty - TB;
Chris@82 1420 T3Q = T1Q - T1N;
Chris@82 1421 T3R = T3P - T3Q;
Chris@82 1422 T4D = T3P + T3Q;
Chris@82 1423 }
Chris@82 1424 }
Chris@82 1425 {
Chris@82 1426 E TN, T5O, TQ, T5Z, T20, T5P, T1X, T60, TX, T63, T27, T5W, TU, T62, T24;
Chris@82 1427 E T5T;
Chris@82 1428 {
Chris@82 1429 E TL, TM, T1V, T1W;
Chris@82 1430 TL = ci[0];
Chris@82 1431 TM = cr[WS(rs, 15)];
Chris@82 1432 TN = TL + TM;
Chris@82 1433 T5O = TL - TM;
Chris@82 1434 {
Chris@82 1435 E TO, TP, T1Y, T1Z;
Chris@82 1436 TO = cr[WS(rs, 7)];
Chris@82 1437 TP = ci[WS(rs, 8)];
Chris@82 1438 TQ = TO + TP;
Chris@82 1439 T5Z = TO - TP;
Chris@82 1440 T1Y = ci[WS(rs, 24)];
Chris@82 1441 T1Z = cr[WS(rs, 23)];
Chris@82 1442 T20 = T1Y - T1Z;
Chris@82 1443 T5P = T1Y + T1Z;
Chris@82 1444 }
Chris@82 1445 T1V = ci[WS(rs, 16)];
Chris@82 1446 T1W = cr[WS(rs, 31)];
Chris@82 1447 T1X = T1V - T1W;
Chris@82 1448 T60 = T1V + T1W;
Chris@82 1449 {
Chris@82 1450 E TV, TW, T5U, T25, T26, T5V;
Chris@82 1451 TV = ci[WS(rs, 4)];
Chris@82 1452 TW = cr[WS(rs, 11)];
Chris@82 1453 T5U = TV - TW;
Chris@82 1454 T25 = ci[WS(rs, 20)];
Chris@82 1455 T26 = cr[WS(rs, 27)];
Chris@82 1456 T5V = T25 + T26;
Chris@82 1457 TX = TV + TW;
Chris@82 1458 T63 = T5U + T5V;
Chris@82 1459 T27 = T25 - T26;
Chris@82 1460 T5W = T5U - T5V;
Chris@82 1461 }
Chris@82 1462 {
Chris@82 1463 E TS, TT, T5R, T22, T23, T5S;
Chris@82 1464 TS = cr[WS(rs, 3)];
Chris@82 1465 TT = ci[WS(rs, 12)];
Chris@82 1466 T5R = TS - TT;
Chris@82 1467 T22 = ci[WS(rs, 28)];
Chris@82 1468 T23 = cr[WS(rs, 19)];
Chris@82 1469 T5S = T22 + T23;
Chris@82 1470 TU = TS + TT;
Chris@82 1471 T62 = T5R + T5S;
Chris@82 1472 T24 = T22 - T23;
Chris@82 1473 T5T = T5R - T5S;
Chris@82 1474 }
Chris@82 1475 }
Chris@82 1476 {
Chris@82 1477 E TR, TY, T7m, T7n;
Chris@82 1478 TR = TN + TQ;
Chris@82 1479 TY = TU + TX;
Chris@82 1480 TZ = TR + TY;
Chris@82 1481 T1U = TR - TY;
Chris@82 1482 T7m = KP707106781 * (T5T - T5W);
Chris@82 1483 T7n = T5Z + T60;
Chris@82 1484 T7o = T7m - T7n;
Chris@82 1485 T89 = T7n + T7m;
Chris@82 1486 }
Chris@82 1487 {
Chris@82 1488 E T7p, T7q, T21, T28;
Chris@82 1489 T7p = T5O + T5P;
Chris@82 1490 T7q = KP707106781 * (T62 + T63);
Chris@82 1491 T7r = T7p - T7q;
Chris@82 1492 T88 = T7p + T7q;
Chris@82 1493 T21 = T1X + T20;
Chris@82 1494 T28 = T24 + T27;
Chris@82 1495 T29 = T21 - T28;
Chris@82 1496 T36 = T21 + T28;
Chris@82 1497 }
Chris@82 1498 {
Chris@82 1499 E T3F, T3G, T5Q, T5X;
Chris@82 1500 T3F = T1X - T20;
Chris@82 1501 T3G = TU - TX;
Chris@82 1502 T3H = T3F - T3G;
Chris@82 1503 T4z = T3G + T3F;
Chris@82 1504 T5Q = T5O - T5P;
Chris@82 1505 T5X = KP707106781 * (T5T + T5W);
Chris@82 1506 T5Y = T5Q - T5X;
Chris@82 1507 T6M = T5Q + T5X;
Chris@82 1508 }
Chris@82 1509 {
Chris@82 1510 E T61, T64, T3I, T3J;
Chris@82 1511 T61 = T5Z - T60;
Chris@82 1512 T64 = KP707106781 * (T62 - T63);
Chris@82 1513 T65 = T61 - T64;
Chris@82 1514 T6N = T61 + T64;
Chris@82 1515 T3I = TN - TQ;
Chris@82 1516 T3J = T27 - T24;
Chris@82 1517 T3K = T3I - T3J;
Chris@82 1518 T4A = T3I + T3J;
Chris@82 1519 }
Chris@82 1520 }
Chris@82 1521 {
Chris@82 1522 E Tv, T10, T30, T34, T37, T38;
Chris@82 1523 Tv = Tf + Tu;
Chris@82 1524 T10 = TK + TZ;
Chris@82 1525 T30 = Tv - T10;
Chris@82 1526 T34 = T32 + T33;
Chris@82 1527 T37 = T35 + T36;
Chris@82 1528 T38 = T34 - T37;
Chris@82 1529 cr[0] = Tv + T10;
Chris@82 1530 ci[0] = T34 + T37;
Chris@82 1531 cr[WS(rs, 16)] = FNMS(T31, T38, T2Z * T30);
Chris@82 1532 ci[WS(rs, 16)] = FMA(T31, T30, T2Z * T38);
Chris@82 1533 }
Chris@82 1534 {
Chris@82 1535 E T3e, T3o, T3k, T3s;
Chris@82 1536 {
Chris@82 1537 E T3c, T3d, T3i, T3j;
Chris@82 1538 T3c = Tf - Tu;
Chris@82 1539 T3d = T36 - T35;
Chris@82 1540 T3e = T3c - T3d;
Chris@82 1541 T3o = T3c + T3d;
Chris@82 1542 T3i = T32 - T33;
Chris@82 1543 T3j = TK - TZ;
Chris@82 1544 T3k = T3i - T3j;
Chris@82 1545 T3s = T3j + T3i;
Chris@82 1546 }
Chris@82 1547 cr[WS(rs, 24)] = FNMS(T3h, T3k, T3b * T3e);
Chris@82 1548 ci[WS(rs, 24)] = FMA(T3b, T3k, T3h * T3e);
Chris@82 1549 cr[WS(rs, 8)] = FNMS(T3r, T3s, T3n * T3o);
Chris@82 1550 ci[WS(rs, 8)] = FMA(T3n, T3s, T3r * T3o);
Chris@82 1551 }
Chris@82 1552 {
Chris@82 1553 E T1C, T2u, T2M, T2G, T2x, T2H, T2b, T2N;
Chris@82 1554 T1C = T1m + T1B;
Chris@82 1555 T2u = T2e + T2t;
Chris@82 1556 T2M = T2t - T2e;
Chris@82 1557 T2G = T1m - T1B;
Chris@82 1558 {
Chris@82 1559 E T2v, T2w, T1T, T2a;
Chris@82 1560 T2v = T1D + T1S;
Chris@82 1561 T2w = T29 - T1U;
Chris@82 1562 T2x = KP707106781 * (T2v + T2w);
Chris@82 1563 T2H = KP707106781 * (T2w - T2v);
Chris@82 1564 T1T = T1D - T1S;
Chris@82 1565 T2a = T1U + T29;
Chris@82 1566 T2b = KP707106781 * (T1T + T2a);
Chris@82 1567 T2N = KP707106781 * (T1T - T2a);
Chris@82 1568 }
Chris@82 1569 {
Chris@82 1570 E T2c, T2y, T2S, T2W;
Chris@82 1571 T2c = T1C - T2b;
Chris@82 1572 T2y = T2u - T2x;
Chris@82 1573 cr[WS(rs, 20)] = FNMS(T2d, T2y, T1l * T2c);
Chris@82 1574 ci[WS(rs, 20)] = FMA(T2d, T2c, T1l * T2y);
Chris@82 1575 T2S = T2G + T2H;
Chris@82 1576 T2W = T2M + T2N;
Chris@82 1577 cr[WS(rs, 12)] = FNMS(T2V, T2W, T2R * T2S);
Chris@82 1578 ci[WS(rs, 12)] = FMA(T2R, T2W, T2V * T2S);
Chris@82 1579 }
Chris@82 1580 {
Chris@82 1581 E T2A, T2C, T2I, T2O;
Chris@82 1582 T2A = T1C + T2b;
Chris@82 1583 T2C = T2u + T2x;
Chris@82 1584 cr[WS(rs, 4)] = FNMS(T2B, T2C, T2z * T2A);
Chris@82 1585 ci[WS(rs, 4)] = FMA(T2B, T2A, T2z * T2C);
Chris@82 1586 T2I = T2G - T2H;
Chris@82 1587 T2O = T2M - T2N;
Chris@82 1588 cr[WS(rs, 28)] = FNMS(T2L, T2O, T2F * T2I);
Chris@82 1589 ci[WS(rs, 28)] = FMA(T2F, T2O, T2L * T2I);
Chris@82 1590 }
Chris@82 1591 }
Chris@82 1592 {
Chris@82 1593 E T4y, T4U, T4K, T4Y, T4F, T4Z, T4N, T4V, T4x, T4J;
Chris@82 1594 T4x = KP707106781 * (T3Z + T40);
Chris@82 1595 T4y = T4w - T4x;
Chris@82 1596 T4U = T4w + T4x;
Chris@82 1597 T4J = KP707106781 * (T3C + T3z);
Chris@82 1598 T4K = T4I - T4J;
Chris@82 1599 T4Y = T4I + T4J;
Chris@82 1600 {
Chris@82 1601 E T4B, T4E, T4L, T4M;
Chris@82 1602 T4B = FNMS(KP382683432, T4A, KP923879532 * T4z);
Chris@82 1603 T4E = FMA(KP923879532, T4C, KP382683432 * T4D);
Chris@82 1604 T4F = T4B - T4E;
Chris@82 1605 T4Z = T4E + T4B;
Chris@82 1606 T4L = FNMS(KP382683432, T4C, KP923879532 * T4D);
Chris@82 1607 T4M = FMA(KP382683432, T4z, KP923879532 * T4A);
Chris@82 1608 T4N = T4L - T4M;
Chris@82 1609 T4V = T4L + T4M;
Chris@82 1610 }
Chris@82 1611 {
Chris@82 1612 E T4G, T4O, T51, T52;
Chris@82 1613 T4G = T4y - T4F;
Chris@82 1614 T4O = T4K - T4N;
Chris@82 1615 cr[WS(rs, 26)] = FNMS(T4H, T4O, T4v * T4G);
Chris@82 1616 ci[WS(rs, 26)] = FMA(T4H, T4G, T4v * T4O);
Chris@82 1617 T51 = T4U + T4V;
Chris@82 1618 T52 = T4Y + T4Z;
Chris@82 1619 cr[WS(rs, 2)] = FNMS(T1c, T52, T17 * T51);
Chris@82 1620 ci[WS(rs, 2)] = FMA(T17, T52, T1c * T51);
Chris@82 1621 }
Chris@82 1622 {
Chris@82 1623 E T4Q, T4S, T4W, T50;
Chris@82 1624 T4Q = T4y + T4F;
Chris@82 1625 T4S = T4K + T4N;
Chris@82 1626 cr[WS(rs, 10)] = FNMS(T4R, T4S, T4P * T4Q);
Chris@82 1627 ci[WS(rs, 10)] = FMA(T4R, T4Q, T4P * T4S);
Chris@82 1628 T4W = T4U - T4V;
Chris@82 1629 T50 = T4Y - T4Z;
Chris@82 1630 cr[WS(rs, 18)] = FNMS(T4X, T50, T4T * T4W);
Chris@82 1631 ci[WS(rs, 18)] = FMA(T4T, T50, T4X * T4W);
Chris@82 1632 }
Chris@82 1633 }
Chris@82 1634 {
Chris@82 1635 E T3E, T4k, T42, T4o, T3T, T4p, T45, T4l, T3D, T41;
Chris@82 1636 T3D = KP707106781 * (T3z - T3C);
Chris@82 1637 T3E = T3w - T3D;
Chris@82 1638 T4k = T3w + T3D;
Chris@82 1639 T41 = KP707106781 * (T3Z - T40);
Chris@82 1640 T42 = T3Y - T41;
Chris@82 1641 T4o = T3Y + T41;
Chris@82 1642 {
Chris@82 1643 E T3L, T3S, T43, T44;
Chris@82 1644 T3L = FNMS(KP923879532, T3K, KP382683432 * T3H);
Chris@82 1645 T3S = FMA(KP382683432, T3O, KP923879532 * T3R);
Chris@82 1646 T3T = T3L - T3S;
Chris@82 1647 T4p = T3S + T3L;
Chris@82 1648 T43 = FNMS(KP923879532, T3O, KP382683432 * T3R);
Chris@82 1649 T44 = FMA(KP923879532, T3H, KP382683432 * T3K);
Chris@82 1650 T45 = T43 - T44;
Chris@82 1651 T4l = T43 + T44;
Chris@82 1652 }
Chris@82 1653 {
Chris@82 1654 E T3U, T46, T4s, T4u;
Chris@82 1655 T3U = T3E - T3T;
Chris@82 1656 T46 = T42 - T45;
Chris@82 1657 cr[WS(rs, 30)] = FNMS(T3V, T46, T3t * T3U);
Chris@82 1658 ci[WS(rs, 30)] = FMA(T3V, T3U, T3t * T46);
Chris@82 1659 T4s = T4k + T4l;
Chris@82 1660 T4u = T4o + T4p;
Chris@82 1661 cr[WS(rs, 6)] = FNMS(T4t, T4u, T4r * T4s);
Chris@82 1662 ci[WS(rs, 6)] = FMA(T4r, T4u, T4t * T4s);
Chris@82 1663 }
Chris@82 1664 {
Chris@82 1665 E T4e, T4g, T4m, T4q;
Chris@82 1666 T4e = T3E + T3T;
Chris@82 1667 T4g = T42 + T45;
Chris@82 1668 cr[WS(rs, 14)] = FNMS(T4f, T4g, T4d * T4e);
Chris@82 1669 ci[WS(rs, 14)] = FMA(T4f, T4e, T4d * T4g);
Chris@82 1670 T4m = T4k - T4l;
Chris@82 1671 T4q = T4o - T4p;
Chris@82 1672 cr[WS(rs, 22)] = FNMS(T4n, T4q, T4j * T4m);
Chris@82 1673 ci[WS(rs, 22)] = FMA(T4j, T4q, T4n * T4m);
Chris@82 1674 }
Chris@82 1675 }
Chris@82 1676 {
Chris@82 1677 E T6I, T72, T6X, T73, T6P, T77, T6U, T76;
Chris@82 1678 {
Chris@82 1679 E T6G, T6H, T6V, T6W;
Chris@82 1680 T6G = T56 + T5d;
Chris@82 1681 T6H = T6h + T6i;
Chris@82 1682 T6I = T6G + T6H;
Chris@82 1683 T72 = T6G - T6H;
Chris@82 1684 T6V = FMA(KP195090322, T6J, KP980785280 * T6K);
Chris@82 1685 T6W = FNMS(KP195090322, T6M, KP980785280 * T6N);
Chris@82 1686 T6X = T6V + T6W;
Chris@82 1687 T73 = T6W - T6V;
Chris@82 1688 }
Chris@82 1689 {
Chris@82 1690 E T6L, T6O, T6S, T6T;
Chris@82 1691 T6L = FNMS(KP195090322, T6K, KP980785280 * T6J);
Chris@82 1692 T6O = FMA(KP980785280, T6M, KP195090322 * T6N);
Chris@82 1693 T6P = T6L + T6O;
Chris@82 1694 T77 = T6L - T6O;
Chris@82 1695 T6S = T6c + T6f;
Chris@82 1696 T6T = T5s + T5l;
Chris@82 1697 T6U = T6S + T6T;
Chris@82 1698 T76 = T6S - T6T;
Chris@82 1699 }
Chris@82 1700 {
Chris@82 1701 E T6Q, T6Y, T79, T7a;
Chris@82 1702 T6Q = T6I - T6P;
Chris@82 1703 T6Y = T6U - T6X;
Chris@82 1704 cr[WS(rs, 17)] = FNMS(T6R, T6Y, T6F * T6Q);
Chris@82 1705 ci[WS(rs, 17)] = FMA(T6R, T6Q, T6F * T6Y);
Chris@82 1706 T79 = T72 + T73;
Chris@82 1707 T7a = T76 + T77;
Chris@82 1708 cr[WS(rs, 9)] = FNMS(T1d, T7a, T18 * T79);
Chris@82 1709 ci[WS(rs, 9)] = FMA(T18, T7a, T1d * T79);
Chris@82 1710 }
Chris@82 1711 {
Chris@82 1712 E T6Z, T70, T74, T78;
Chris@82 1713 T6Z = T6I + T6P;
Chris@82 1714 T70 = T6U + T6X;
Chris@82 1715 cr[WS(rs, 1)] = FNMS(T14, T70, T11 * T6Z);
Chris@82 1716 ci[WS(rs, 1)] = FMA(T14, T6Z, T11 * T70);
Chris@82 1717 T74 = T72 - T73;
Chris@82 1718 T78 = T76 - T77;
Chris@82 1719 cr[WS(rs, 25)] = FNMS(T75, T78, T71 * T74);
Chris@82 1720 ci[WS(rs, 25)] = FMA(T71, T78, T75 * T74);
Chris@82 1721 }
Chris@82 1722 }
Chris@82 1723 {
Chris@82 1724 E T84, T8q, T8l, T8r, T8b, T8v, T8i, T8u;
Chris@82 1725 {
Chris@82 1726 E T82, T83, T8j, T8k;
Chris@82 1727 T82 = T7b + T7c;
Chris@82 1728 T83 = T7F + T7G;
Chris@82 1729 T84 = T82 - T83;
Chris@82 1730 T8q = T82 + T83;
Chris@82 1731 T8j = FMA(KP195090322, T86, KP980785280 * T85);
Chris@82 1732 T8k = FMA(KP195090322, T89, KP980785280 * T88);
Chris@82 1733 T8l = T8j - T8k;
Chris@82 1734 T8r = T8j + T8k;
Chris@82 1735 }
Chris@82 1736 {
Chris@82 1737 E T87, T8a, T8g, T8h;
Chris@82 1738 T87 = FNMS(KP980785280, T86, KP195090322 * T85);
Chris@82 1739 T8a = FNMS(KP980785280, T89, KP195090322 * T88);
Chris@82 1740 T8b = T87 + T8a;
Chris@82 1741 T8v = T87 - T8a;
Chris@82 1742 T8g = T7C - T7D;
Chris@82 1743 T8h = T7g - T7j;
Chris@82 1744 T8i = T8g + T8h;
Chris@82 1745 T8u = T8g - T8h;
Chris@82 1746 }
Chris@82 1747 {
Chris@82 1748 E T8c, T8m, T8y, T8A;
Chris@82 1749 T8c = T84 - T8b;
Chris@82 1750 T8m = T8i - T8l;
Chris@82 1751 cr[WS(rs, 23)] = FNMS(T8f, T8m, T81 * T8c);
Chris@82 1752 ci[WS(rs, 23)] = FMA(T8f, T8c, T81 * T8m);
Chris@82 1753 T8y = T8q + T8r;
Chris@82 1754 T8A = T8u - T8v;
Chris@82 1755 cr[WS(rs, 31)] = FNMS(T8z, T8A, T8x * T8y);
Chris@82 1756 ci[WS(rs, 31)] = FMA(T8x, T8A, T8z * T8y);
Chris@82 1757 }
Chris@82 1758 {
Chris@82 1759 E T8n, T8o, T8s, T8w;
Chris@82 1760 T8n = T84 + T8b;
Chris@82 1761 T8o = T8i + T8l;
Chris@82 1762 cr[WS(rs, 7)] = FNMS(T1j, T8o, T1f * T8n);
Chris@82 1763 ci[WS(rs, 7)] = FMA(T1j, T8n, T1f * T8o);
Chris@82 1764 T8s = T8q - T8r;
Chris@82 1765 T8w = T8u + T8v;
Chris@82 1766 cr[WS(rs, 15)] = FNMS(T8t, T8w, T8p * T8s);
Chris@82 1767 ci[WS(rs, 15)] = FMA(T8p, T8w, T8t * T8s);
Chris@82 1768 }
Chris@82 1769 }
Chris@82 1770 {
Chris@82 1771 E T5u, T6u, T6n, T6v, T67, T6B, T6k, T6A;
Chris@82 1772 {
Chris@82 1773 E T5e, T5t, T6l, T6m;
Chris@82 1774 T5e = T56 - T5d;
Chris@82 1775 T5t = T5l - T5s;
Chris@82 1776 T5u = T5e + T5t;
Chris@82 1777 T6u = T5e - T5t;
Chris@82 1778 T6l = FMA(KP831469612, T5F, KP555570233 * T5M);
Chris@82 1779 T6m = FNMS(KP831469612, T5Y, KP555570233 * T65);
Chris@82 1780 T6n = T6l + T6m;
Chris@82 1781 T6v = T6m - T6l;
Chris@82 1782 }
Chris@82 1783 {
Chris@82 1784 E T5N, T66, T6g, T6j;
Chris@82 1785 T5N = FNMS(KP831469612, T5M, KP555570233 * T5F);
Chris@82 1786 T66 = FMA(KP555570233, T5Y, KP831469612 * T65);
Chris@82 1787 T67 = T5N + T66;
Chris@82 1788 T6B = T5N - T66;
Chris@82 1789 T6g = T6c - T6f;
Chris@82 1790 T6j = T6h - T6i;
Chris@82 1791 T6k = T6g + T6j;
Chris@82 1792 T6A = T6g - T6j;
Chris@82 1793 }
Chris@82 1794 {
Chris@82 1795 E T68, T6o, T6D, T6E;
Chris@82 1796 T68 = T5u - T67;
Chris@82 1797 T6o = T6k - T6n;
Chris@82 1798 cr[WS(rs, 21)] = FNMS(T69, T6o, T53 * T68);
Chris@82 1799 ci[WS(rs, 21)] = FMA(T69, T68, T53 * T6o);
Chris@82 1800 T6D = T6u + T6v;
Chris@82 1801 T6E = T6A + T6B;
Chris@82 1802 cr[WS(rs, 13)] = FNMS(T4c, T6E, T49 * T6D);
Chris@82 1803 ci[WS(rs, 13)] = FMA(T49, T6E, T4c * T6D);
Chris@82 1804 }
Chris@82 1805 {
Chris@82 1806 E T6p, T6q, T6w, T6C;
Chris@82 1807 T6p = T5u + T67;
Chris@82 1808 T6q = T6k + T6n;
Chris@82 1809 cr[WS(rs, 5)] = FNMS(T4i, T6q, T4h * T6p);
Chris@82 1810 ci[WS(rs, 5)] = FMA(T4i, T6p, T4h * T6q);
Chris@82 1811 T6w = T6u - T6v;
Chris@82 1812 T6C = T6A - T6B;
Chris@82 1813 cr[WS(rs, 29)] = FNMS(T6z, T6C, T6t * T6w);
Chris@82 1814 ci[WS(rs, 29)] = FMA(T6t, T6C, T6z * T6w);
Chris@82 1815 }
Chris@82 1816 }
Chris@82 1817 {
Chris@82 1818 E T7l, T7Q, T7L, T7R, T7A, T7V, T7I, T7U;
Chris@82 1819 {
Chris@82 1820 E T7d, T7k, T7J, T7K;
Chris@82 1821 T7d = T7b - T7c;
Chris@82 1822 T7k = T7g + T7j;
Chris@82 1823 T7l = T7d - T7k;
Chris@82 1824 T7Q = T7d + T7k;
Chris@82 1825 T7J = FNMS(KP555570233, T7v, KP831469612 * T7y);
Chris@82 1826 T7K = FMA(KP555570233, T7o, KP831469612 * T7r);
Chris@82 1827 T7L = T7J - T7K;
Chris@82 1828 T7R = T7J + T7K;
Chris@82 1829 }
Chris@82 1830 {
Chris@82 1831 E T7s, T7z, T7E, T7H;
Chris@82 1832 T7s = FNMS(KP555570233, T7r, KP831469612 * T7o);
Chris@82 1833 T7z = FMA(KP831469612, T7v, KP555570233 * T7y);
Chris@82 1834 T7A = T7s - T7z;
Chris@82 1835 T7V = T7z + T7s;
Chris@82 1836 T7E = T7C + T7D;
Chris@82 1837 T7H = T7F - T7G;
Chris@82 1838 T7I = T7E - T7H;
Chris@82 1839 T7U = T7E + T7H;
Chris@82 1840 }
Chris@82 1841 {
Chris@82 1842 E T7B, T7M, T7X, T7Y;
Chris@82 1843 T7B = T7l - T7A;
Chris@82 1844 T7M = T7I - T7L;
Chris@82 1845 cr[WS(rs, 27)] = FNMS(T1k, T7M, T1g * T7B);
Chris@82 1846 ci[WS(rs, 27)] = FMA(T1k, T7B, T1g * T7M);
Chris@82 1847 T7X = T7Q + T7R;
Chris@82 1848 T7Y = T7U + T7V;
Chris@82 1849 cr[WS(rs, 3)] = FNMS(T15, T7Y, T12 * T7X);
Chris@82 1850 ci[WS(rs, 3)] = FMA(T12, T7Y, T15 * T7X);
Chris@82 1851 }
Chris@82 1852 {
Chris@82 1853 E T7N, T7O, T7S, T7W;
Chris@82 1854 T7N = T7l + T7A;
Chris@82 1855 T7O = T7I + T7L;
Chris@82 1856 cr[WS(rs, 11)] = FNMS(T2Y, T7O, T2X * T7N);
Chris@82 1857 ci[WS(rs, 11)] = FMA(T2Y, T7N, T2X * T7O);
Chris@82 1858 T7S = T7Q - T7R;
Chris@82 1859 T7W = T7U - T7V;
Chris@82 1860 cr[WS(rs, 19)] = FNMS(T7T, T7W, T7P * T7S);
Chris@82 1861 ci[WS(rs, 19)] = FMA(T7P, T7W, T7T * T7S);
Chris@82 1862 }
Chris@82 1863 }
Chris@82 1864 }
Chris@82 1865 }
Chris@82 1866 }
Chris@82 1867 }
Chris@82 1868
Chris@82 1869 static const tw_instr twinstr[] = {
Chris@82 1870 {TW_CEXP, 1, 1},
Chris@82 1871 {TW_CEXP, 1, 3},
Chris@82 1872 {TW_CEXP, 1, 9},
Chris@82 1873 {TW_CEXP, 1, 27},
Chris@82 1874 {TW_NEXT, 1, 0}
Chris@82 1875 };
Chris@82 1876
Chris@82 1877 static const hc2hc_desc desc = { 32, "hb2_32", twinstr, &GENUS, {376, 168, 112, 0} };
Chris@82 1878
Chris@82 1879 void X(codelet_hb2_32) (planner *p) {
Chris@82 1880 X(khc2hc_register) (p, hb2_32, &desc);
Chris@82 1881 }
Chris@82 1882 #endif