annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cb_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:31 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include r2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 394 FP additions, 216 FP multiplications,
Chris@42 32 * (or, 178 additions, 0 multiplications, 216 fused multiply/add),
Chris@42 33 * 143 stack variables, 18 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cb.h"
Chris@42 36
Chris@42 37 static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@42 40 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@42 41 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 42 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 43 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@42 44 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@42 45 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 46 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 47 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 48 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@42 49 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@42 50 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 51 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 52 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 53 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@42 54 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 55 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@42 56 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 57 {
Chris@42 58 INT i;
Chris@42 59 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 60 E T3d, T32, T37, T2Z, T3f, T3b, T3c, T35;
Chris@42 61 {
Chris@42 62 E T5H, T9, T5j, T4p, T2T, T1b, T3Z, T3j, Tg, T5I, T5k, T4u, T40, T3m, T2U;
Chris@42 63 E T1m, T3o, T1s, T1J, T3r, T5K, Tw, T5N, T6c, T4A, T5n, T3s, T1D, T5m, T4F;
Chris@42 64 E T3p, T1M, T3w, T1U, T2z, T3H, T5Q, TM, T6f, T5Y, T5q, T4M, T3I, T25, T5t;
Chris@42 65 E T53, T3x, T2C, T3A, T5V, T11, T6g, T5T, T55, T4W, T3z, T2E, T2h, T2F, T2s;
Chris@42 66 E T3L, T3E, T54, T4R;
Chris@42 67 {
Chris@42 68 E Td, T1c, Tc, T4r, T1k, Te, T1d, T1e;
Chris@42 69 {
Chris@42 70 E T3h, T15, T1a, T3i;
Chris@42 71 {
Chris@42 72 E T4, T14, T17, T13, T3, T16, T8, T18;
Chris@42 73 T4 = Cr[WS(csr, 16)];
Chris@42 74 T14 = Ci[WS(csi, 16)];
Chris@42 75 {
Chris@42 76 E T1, T2, T6, T7;
Chris@42 77 T1 = Cr[0];
Chris@42 78 T2 = Cr[WS(csr, 32)];
Chris@42 79 T6 = Cr[WS(csr, 8)];
Chris@42 80 T7 = Cr[WS(csr, 24)];
Chris@42 81 T17 = Ci[WS(csi, 8)];
Chris@42 82 T13 = T1 - T2;
Chris@42 83 T3 = T1 + T2;
Chris@42 84 T16 = T6 - T7;
Chris@42 85 T8 = T6 + T7;
Chris@42 86 T18 = Ci[WS(csi, 24)];
Chris@42 87 }
Chris@42 88 {
Chris@42 89 E T4n, T5, T4o, T19;
Chris@42 90 T4n = FNMS(KP2_000000000, T4, T3);
Chris@42 91 T5 = FMA(KP2_000000000, T4, T3);
Chris@42 92 T3h = FMA(KP2_000000000, T14, T13);
Chris@42 93 T15 = FNMS(KP2_000000000, T14, T13);
Chris@42 94 T4o = T17 - T18;
Chris@42 95 T19 = T17 + T18;
Chris@42 96 T5H = FNMS(KP2_000000000, T8, T5);
Chris@42 97 T9 = FMA(KP2_000000000, T8, T5);
Chris@42 98 T5j = FMA(KP2_000000000, T4o, T4n);
Chris@42 99 T4p = FNMS(KP2_000000000, T4o, T4n);
Chris@42 100 T1a = T16 - T19;
Chris@42 101 T3i = T16 + T19;
Chris@42 102 }
Chris@42 103 }
Chris@42 104 {
Chris@42 105 E Ta, Tb, T1i, T1j;
Chris@42 106 Ta = Cr[WS(csr, 4)];
Chris@42 107 T2T = FNMS(KP1_414213562, T1a, T15);
Chris@42 108 T1b = FMA(KP1_414213562, T1a, T15);
Chris@42 109 T3Z = FMA(KP1_414213562, T3i, T3h);
Chris@42 110 T3j = FNMS(KP1_414213562, T3i, T3h);
Chris@42 111 Tb = Cr[WS(csr, 28)];
Chris@42 112 T1i = Ci[WS(csi, 4)];
Chris@42 113 T1j = Ci[WS(csi, 28)];
Chris@42 114 Td = Cr[WS(csr, 20)];
Chris@42 115 T1c = Ta - Tb;
Chris@42 116 Tc = Ta + Tb;
Chris@42 117 T4r = T1i - T1j;
Chris@42 118 T1k = T1i + T1j;
Chris@42 119 Te = Cr[WS(csr, 12)];
Chris@42 120 T1d = Ci[WS(csi, 20)];
Chris@42 121 T1e = Ci[WS(csi, 12)];
Chris@42 122 }
Chris@42 123 }
Chris@42 124 {
Chris@42 125 E T4B, T4E, T1K, T1L;
Chris@42 126 {
Chris@42 127 E T1o, Tk, T4C, T1I, T1F, Tn, T4D, T1r, Ts, T1t, Tr, T4y, T1w, Tt, T1z;
Chris@42 128 E T1A;
Chris@42 129 {
Chris@42 130 E Tl, Tm, T1p, T1q;
Chris@42 131 {
Chris@42 132 E Ti, Tj, T1G, T1H, T1h, Tf;
Chris@42 133 Ti = Cr[WS(csr, 2)];
Chris@42 134 T1h = Td - Te;
Chris@42 135 Tf = Td + Te;
Chris@42 136 {
Chris@42 137 E T4s, T1f, T3k, T1l;
Chris@42 138 T4s = T1d - T1e;
Chris@42 139 T1f = T1d + T1e;
Chris@42 140 T3k = T1k - T1h;
Chris@42 141 T1l = T1h + T1k;
Chris@42 142 {
Chris@42 143 E T4q, T4t, T3l, T1g;
Chris@42 144 T4q = Tc - Tf;
Chris@42 145 Tg = Tc + Tf;
Chris@42 146 T4t = T4r - T4s;
Chris@42 147 T5I = T4s + T4r;
Chris@42 148 T3l = T1c + T1f;
Chris@42 149 T1g = T1c - T1f;
Chris@42 150 T5k = T4q + T4t;
Chris@42 151 T4u = T4q - T4t;
Chris@42 152 T40 = FMA(KP414213562, T3k, T3l);
Chris@42 153 T3m = FNMS(KP414213562, T3l, T3k);
Chris@42 154 T2U = FMA(KP414213562, T1g, T1l);
Chris@42 155 T1m = FNMS(KP414213562, T1l, T1g);
Chris@42 156 Tj = Cr[WS(csr, 30)];
Chris@42 157 }
Chris@42 158 }
Chris@42 159 T1G = Ci[WS(csi, 2)];
Chris@42 160 T1H = Ci[WS(csi, 30)];
Chris@42 161 Tl = Cr[WS(csr, 18)];
Chris@42 162 T1o = Ti - Tj;
Chris@42 163 Tk = Ti + Tj;
Chris@42 164 T4C = T1G - T1H;
Chris@42 165 T1I = T1G + T1H;
Chris@42 166 Tm = Cr[WS(csr, 14)];
Chris@42 167 T1p = Ci[WS(csi, 18)];
Chris@42 168 T1q = Ci[WS(csi, 14)];
Chris@42 169 }
Chris@42 170 {
Chris@42 171 E Tp, Tq, T1u, T1v;
Chris@42 172 Tp = Cr[WS(csr, 10)];
Chris@42 173 T1F = Tl - Tm;
Chris@42 174 Tn = Tl + Tm;
Chris@42 175 T4D = T1p - T1q;
Chris@42 176 T1r = T1p + T1q;
Chris@42 177 Tq = Cr[WS(csr, 22)];
Chris@42 178 T1u = Ci[WS(csi, 10)];
Chris@42 179 T1v = Ci[WS(csi, 22)];
Chris@42 180 Ts = Cr[WS(csr, 6)];
Chris@42 181 T1t = Tp - Tq;
Chris@42 182 Tr = Tp + Tq;
Chris@42 183 T4y = T1u - T1v;
Chris@42 184 T1w = T1u + T1v;
Chris@42 185 Tt = Cr[WS(csr, 26)];
Chris@42 186 T1z = Ci[WS(csi, 6)];
Chris@42 187 T1A = Ci[WS(csi, 26)];
Chris@42 188 }
Chris@42 189 }
Chris@42 190 {
Chris@42 191 E T1y, T4x, T1B, T4w, To, Tv, Tu;
Chris@42 192 T3o = T1o + T1r;
Chris@42 193 T1s = T1o - T1r;
Chris@42 194 T1y = Ts - Tt;
Chris@42 195 Tu = Ts + Tt;
Chris@42 196 T4x = T1A - T1z;
Chris@42 197 T1B = T1z + T1A;
Chris@42 198 T1J = T1F + T1I;
Chris@42 199 T3r = T1I - T1F;
Chris@42 200 T4w = Tk - Tn;
Chris@42 201 To = Tk + Tn;
Chris@42 202 Tv = Tr + Tu;
Chris@42 203 T4B = Tr - Tu;
Chris@42 204 {
Chris@42 205 E T4z, T5L, T5M, T1x, T1C;
Chris@42 206 T4E = T4C - T4D;
Chris@42 207 T5L = T4D + T4C;
Chris@42 208 T5M = T4y + T4x;
Chris@42 209 T4z = T4x - T4y;
Chris@42 210 T5K = To - Tv;
Chris@42 211 Tw = To + Tv;
Chris@42 212 T5N = T5L - T5M;
Chris@42 213 T6c = T5M + T5L;
Chris@42 214 T1K = T1t + T1w;
Chris@42 215 T1x = T1t - T1w;
Chris@42 216 T1C = T1y - T1B;
Chris@42 217 T1L = T1y + T1B;
Chris@42 218 T4A = T4w + T4z;
Chris@42 219 T5n = T4w - T4z;
Chris@42 220 T3s = T1C - T1x;
Chris@42 221 T1D = T1x + T1C;
Chris@42 222 }
Chris@42 223 }
Chris@42 224 }
Chris@42 225 {
Chris@42 226 E T4Z, T52, T2A, T2B;
Chris@42 227 {
Chris@42 228 E T1Q, TA, T50, T2y, T2v, TD, T51, T1T, TI, T1V, TH, T4K, T1Y, TJ, T21;
Chris@42 229 E T22;
Chris@42 230 {
Chris@42 231 E TB, TC, T1R, T1S;
Chris@42 232 {
Chris@42 233 E Ty, Tz, T2w, T2x;
Chris@42 234 Ty = Cr[WS(csr, 1)];
Chris@42 235 T5m = T4E - T4B;
Chris@42 236 T4F = T4B + T4E;
Chris@42 237 T3p = T1K + T1L;
Chris@42 238 T1M = T1K - T1L;
Chris@42 239 Tz = Cr[WS(csr, 31)];
Chris@42 240 T2w = Ci[WS(csi, 1)];
Chris@42 241 T2x = Ci[WS(csi, 31)];
Chris@42 242 TB = Cr[WS(csr, 17)];
Chris@42 243 T1Q = Ty - Tz;
Chris@42 244 TA = Ty + Tz;
Chris@42 245 T50 = T2w - T2x;
Chris@42 246 T2y = T2w + T2x;
Chris@42 247 TC = Cr[WS(csr, 15)];
Chris@42 248 T1R = Ci[WS(csi, 17)];
Chris@42 249 T1S = Ci[WS(csi, 15)];
Chris@42 250 }
Chris@42 251 {
Chris@42 252 E TF, TG, T1W, T1X;
Chris@42 253 TF = Cr[WS(csr, 9)];
Chris@42 254 T2v = TB - TC;
Chris@42 255 TD = TB + TC;
Chris@42 256 T51 = T1R - T1S;
Chris@42 257 T1T = T1R + T1S;
Chris@42 258 TG = Cr[WS(csr, 23)];
Chris@42 259 T1W = Ci[WS(csi, 9)];
Chris@42 260 T1X = Ci[WS(csi, 23)];
Chris@42 261 TI = Cr[WS(csr, 7)];
Chris@42 262 T1V = TF - TG;
Chris@42 263 TH = TF + TG;
Chris@42 264 T4K = T1W - T1X;
Chris@42 265 T1Y = T1W + T1X;
Chris@42 266 TJ = Cr[WS(csr, 25)];
Chris@42 267 T21 = Ci[WS(csi, 7)];
Chris@42 268 T22 = Ci[WS(csi, 25)];
Chris@42 269 }
Chris@42 270 }
Chris@42 271 {
Chris@42 272 E T20, T4J, T23, T4I, TE, TL, TK;
Chris@42 273 T3w = T1Q + T1T;
Chris@42 274 T1U = T1Q - T1T;
Chris@42 275 T20 = TI - TJ;
Chris@42 276 TK = TI + TJ;
Chris@42 277 T4J = T22 - T21;
Chris@42 278 T23 = T21 + T22;
Chris@42 279 T2z = T2v + T2y;
Chris@42 280 T3H = T2y - T2v;
Chris@42 281 T4I = TA - TD;
Chris@42 282 TE = TA + TD;
Chris@42 283 TL = TH + TK;
Chris@42 284 T4Z = TH - TK;
Chris@42 285 {
Chris@42 286 E T4L, T5W, T5X, T1Z, T24;
Chris@42 287 T52 = T50 - T51;
Chris@42 288 T5W = T51 + T50;
Chris@42 289 T5X = T4K + T4J;
Chris@42 290 T4L = T4J - T4K;
Chris@42 291 T5Q = TE - TL;
Chris@42 292 TM = TE + TL;
Chris@42 293 T6f = T5X + T5W;
Chris@42 294 T5Y = T5W - T5X;
Chris@42 295 T2A = T1V + T1Y;
Chris@42 296 T1Z = T1V - T1Y;
Chris@42 297 T24 = T20 - T23;
Chris@42 298 T2B = T20 + T23;
Chris@42 299 T5q = T4I - T4L;
Chris@42 300 T4M = T4I + T4L;
Chris@42 301 T3I = T24 - T1Z;
Chris@42 302 T25 = T1Z + T24;
Chris@42 303 }
Chris@42 304 }
Chris@42 305 }
Chris@42 306 {
Chris@42 307 E T27, TP, T4O, T2f, T2c, TS, T4P, T2a, TX, T2i, TW, T4T, T2q, TY, T2j;
Chris@42 308 E T2k;
Chris@42 309 {
Chris@42 310 E TQ, TR, T28, T29;
Chris@42 311 {
Chris@42 312 E TN, TO, T2d, T2e;
Chris@42 313 TN = Cr[WS(csr, 5)];
Chris@42 314 T5t = T52 - T4Z;
Chris@42 315 T53 = T4Z + T52;
Chris@42 316 T3x = T2A + T2B;
Chris@42 317 T2C = T2A - T2B;
Chris@42 318 TO = Cr[WS(csr, 27)];
Chris@42 319 T2d = Ci[WS(csi, 5)];
Chris@42 320 T2e = Ci[WS(csi, 27)];
Chris@42 321 TQ = Cr[WS(csr, 21)];
Chris@42 322 T27 = TN - TO;
Chris@42 323 TP = TN + TO;
Chris@42 324 T4O = T2d - T2e;
Chris@42 325 T2f = T2d + T2e;
Chris@42 326 TR = Cr[WS(csr, 11)];
Chris@42 327 T28 = Ci[WS(csi, 21)];
Chris@42 328 T29 = Ci[WS(csi, 11)];
Chris@42 329 }
Chris@42 330 {
Chris@42 331 E TU, TV, T2o, T2p;
Chris@42 332 TU = Cr[WS(csr, 3)];
Chris@42 333 T2c = TQ - TR;
Chris@42 334 TS = TQ + TR;
Chris@42 335 T4P = T28 - T29;
Chris@42 336 T2a = T28 + T29;
Chris@42 337 TV = Cr[WS(csr, 29)];
Chris@42 338 T2o = Ci[WS(csi, 3)];
Chris@42 339 T2p = Ci[WS(csi, 29)];
Chris@42 340 TX = Cr[WS(csr, 13)];
Chris@42 341 T2i = TU - TV;
Chris@42 342 TW = TU + TV;
Chris@42 343 T4T = T2p - T2o;
Chris@42 344 T2q = T2o + T2p;
Chris@42 345 TY = Cr[WS(csr, 19)];
Chris@42 346 T2j = Ci[WS(csi, 13)];
Chris@42 347 T2k = Ci[WS(csi, 19)];
Chris@42 348 }
Chris@42 349 }
Chris@42 350 {
Chris@42 351 E T4N, T2n, T2l, T4Q, T2b, T2g, TT, TZ, T4U;
Chris@42 352 T4N = TP - TS;
Chris@42 353 TT = TP + TS;
Chris@42 354 T2n = TX - TY;
Chris@42 355 TZ = TX + TY;
Chris@42 356 T4U = T2j - T2k;
Chris@42 357 T2l = T2j + T2k;
Chris@42 358 {
Chris@42 359 E T5S, T10, T4S, T4V, T5R;
Chris@42 360 T5S = T4P + T4O;
Chris@42 361 T4Q = T4O - T4P;
Chris@42 362 T10 = TW + TZ;
Chris@42 363 T4S = TW - TZ;
Chris@42 364 T4V = T4T - T4U;
Chris@42 365 T5R = T4U + T4T;
Chris@42 366 T3A = T27 + T2a;
Chris@42 367 T2b = T27 - T2a;
Chris@42 368 T5V = TT - T10;
Chris@42 369 T11 = TT + T10;
Chris@42 370 T6g = T5S + T5R;
Chris@42 371 T5T = T5R - T5S;
Chris@42 372 T55 = T4V - T4S;
Chris@42 373 T4W = T4S + T4V;
Chris@42 374 T2g = T2c + T2f;
Chris@42 375 T3z = T2f - T2c;
Chris@42 376 }
Chris@42 377 {
Chris@42 378 E T3D, T3C, T2m, T2r;
Chris@42 379 T3D = T2i + T2l;
Chris@42 380 T2m = T2i - T2l;
Chris@42 381 T2r = T2n - T2q;
Chris@42 382 T3C = T2n + T2q;
Chris@42 383 T2E = FMA(KP414213562, T2b, T2g);
Chris@42 384 T2h = FNMS(KP414213562, T2g, T2b);
Chris@42 385 T2F = FNMS(KP414213562, T2m, T2r);
Chris@42 386 T2s = FMA(KP414213562, T2r, T2m);
Chris@42 387 T3L = FMA(KP414213562, T3C, T3D);
Chris@42 388 T3E = FNMS(KP414213562, T3D, T3C);
Chris@42 389 T54 = T4N + T4Q;
Chris@42 390 T4R = T4N - T4Q;
Chris@42 391 }
Chris@42 392 }
Chris@42 393 }
Chris@42 394 }
Chris@42 395 }
Chris@42 396 }
Chris@42 397 {
Chris@42 398 E T3K, T3B, T5u, T5r, T5d, T5g;
Chris@42 399 {
Chris@42 400 E T6e, T6h, T6b, T5J, T5O, T5Z, T66, T69, T65, T67, T5U, T12, T6m, Th;
Chris@42 401 T6e = TM - T11;
Chris@42 402 T12 = TM + T11;
Chris@42 403 T6m = T6g + T6f;
Chris@42 404 T6h = T6f - T6g;
Chris@42 405 T6b = FNMS(KP2_000000000, Tg, T9);
Chris@42 406 Th = FMA(KP2_000000000, Tg, T9);
Chris@42 407 T3K = FMA(KP414213562, T3z, T3A);
Chris@42 408 T3B = FNMS(KP414213562, T3A, T3z);
Chris@42 409 {
Chris@42 410 E T63, T64, T6l, Tx;
Chris@42 411 T5J = FNMS(KP2_000000000, T5I, T5H);
Chris@42 412 T63 = FMA(KP2_000000000, T5I, T5H);
Chris@42 413 T64 = T5K + T5N;
Chris@42 414 T5O = T5K - T5N;
Chris@42 415 T5Z = T5V + T5Y;
Chris@42 416 T66 = T5Y - T5V;
Chris@42 417 T6l = FNMS(KP2_000000000, Tw, Th);
Chris@42 418 Tx = FMA(KP2_000000000, Tw, Th);
Chris@42 419 T69 = FMA(KP1_414213562, T64, T63);
Chris@42 420 T65 = FNMS(KP1_414213562, T64, T63);
Chris@42 421 R0[WS(rs, 8)] = FNMS(KP2_000000000, T6m, T6l);
Chris@42 422 R0[WS(rs, 24)] = FMA(KP2_000000000, T6m, T6l);
Chris@42 423 R0[0] = FMA(KP2_000000000, T12, Tx);
Chris@42 424 R0[WS(rs, 16)] = FNMS(KP2_000000000, T12, Tx);
Chris@42 425 T67 = T5Q - T5T;
Chris@42 426 T5U = T5Q + T5T;
Chris@42 427 }
Chris@42 428 {
Chris@42 429 E T6j, T6d, T6a, T68;
Chris@42 430 T6a = FMA(KP414213562, T66, T67);
Chris@42 431 T68 = FNMS(KP414213562, T67, T66);
Chris@42 432 T6j = FMA(KP2_000000000, T6c, T6b);
Chris@42 433 T6d = FNMS(KP2_000000000, T6c, T6b);
Chris@42 434 R0[WS(rs, 14)] = FNMS(KP1_847759065, T6a, T69);
Chris@42 435 R0[WS(rs, 30)] = FMA(KP1_847759065, T6a, T69);
Chris@42 436 R0[WS(rs, 22)] = FMA(KP1_847759065, T68, T65);
Chris@42 437 R0[WS(rs, 6)] = FNMS(KP1_847759065, T68, T65);
Chris@42 438 {
Chris@42 439 E T61, T5P, T6k, T6i;
Chris@42 440 T6k = T6e + T6h;
Chris@42 441 T6i = T6e - T6h;
Chris@42 442 T61 = FNMS(KP1_414213562, T5O, T5J);
Chris@42 443 T5P = FMA(KP1_414213562, T5O, T5J);
Chris@42 444 R0[WS(rs, 12)] = FNMS(KP1_414213562, T6k, T6j);
Chris@42 445 R0[WS(rs, 28)] = FMA(KP1_414213562, T6k, T6j);
Chris@42 446 R0[WS(rs, 4)] = FMA(KP1_414213562, T6i, T6d);
Chris@42 447 R0[WS(rs, 20)] = FNMS(KP1_414213562, T6i, T6d);
Chris@42 448 {
Chris@42 449 E T5b, T4v, T5f, T4Y, T5e, T57, T4G, T5c;
Chris@42 450 {
Chris@42 451 E T4X, T56, T62, T60;
Chris@42 452 T5u = T4W - T4R;
Chris@42 453 T4X = T4R + T4W;
Chris@42 454 T56 = T54 + T55;
Chris@42 455 T5r = T54 - T55;
Chris@42 456 T5b = FNMS(KP1_414213562, T4u, T4p);
Chris@42 457 T4v = FMA(KP1_414213562, T4u, T4p);
Chris@42 458 T62 = FMA(KP414213562, T5U, T5Z);
Chris@42 459 T60 = FNMS(KP414213562, T5Z, T5U);
Chris@42 460 T5f = FNMS(KP707106781, T4X, T4M);
Chris@42 461 T4Y = FMA(KP707106781, T4X, T4M);
Chris@42 462 T5e = FNMS(KP707106781, T56, T53);
Chris@42 463 T57 = FMA(KP707106781, T56, T53);
Chris@42 464 R0[WS(rs, 10)] = FNMS(KP1_847759065, T62, T61);
Chris@42 465 R0[WS(rs, 26)] = FMA(KP1_847759065, T62, T61);
Chris@42 466 R0[WS(rs, 2)] = FMA(KP1_847759065, T60, T5P);
Chris@42 467 R0[WS(rs, 18)] = FNMS(KP1_847759065, T60, T5P);
Chris@42 468 T4G = FNMS(KP414213562, T4F, T4A);
Chris@42 469 T5c = FMA(KP414213562, T4A, T4F);
Chris@42 470 }
Chris@42 471 {
Chris@42 472 E T5a, T59, T5h, T5i, T58, T4H;
Chris@42 473 T5a = FMA(KP198912367, T4Y, T57);
Chris@42 474 T58 = FNMS(KP198912367, T57, T4Y);
Chris@42 475 T59 = FNMS(KP1_847759065, T4G, T4v);
Chris@42 476 T4H = FMA(KP1_847759065, T4G, T4v);
Chris@42 477 T5h = FMA(KP1_847759065, T5c, T5b);
Chris@42 478 T5d = FNMS(KP1_847759065, T5c, T5b);
Chris@42 479 T5i = FMA(KP668178637, T5e, T5f);
Chris@42 480 T5g = FNMS(KP668178637, T5f, T5e);
Chris@42 481 R0[WS(rs, 1)] = FMA(KP1_961570560, T58, T4H);
Chris@42 482 R0[WS(rs, 17)] = FNMS(KP1_961570560, T58, T4H);
Chris@42 483 R0[WS(rs, 29)] = FMA(KP1_662939224, T5i, T5h);
Chris@42 484 R0[WS(rs, 13)] = FNMS(KP1_662939224, T5i, T5h);
Chris@42 485 R0[WS(rs, 25)] = FMA(KP1_961570560, T5a, T59);
Chris@42 486 R0[WS(rs, 9)] = FNMS(KP1_961570560, T5a, T59);
Chris@42 487 }
Chris@42 488 }
Chris@42 489 }
Chris@42 490 }
Chris@42 491 }
Chris@42 492 {
Chris@42 493 E T43, T42, T46, T4a, T49, T3V, T3G, T47, T3P, T3v, T3X, T3T, T3U, T3N, T5B;
Chris@42 494 E T5E;
Chris@42 495 {
Chris@42 496 E T5s, T5D, T5z, T5l, T5C, T5v, T5o, T5A;
Chris@42 497 R0[WS(rs, 21)] = FMA(KP1_662939224, T5g, T5d);
Chris@42 498 R0[WS(rs, 5)] = FNMS(KP1_662939224, T5g, T5d);
Chris@42 499 T5s = FNMS(KP707106781, T5r, T5q);
Chris@42 500 T5D = FMA(KP707106781, T5r, T5q);
Chris@42 501 T5z = FMA(KP1_414213562, T5k, T5j);
Chris@42 502 T5l = FNMS(KP1_414213562, T5k, T5j);
Chris@42 503 T5C = FMA(KP707106781, T5u, T5t);
Chris@42 504 T5v = FNMS(KP707106781, T5u, T5t);
Chris@42 505 T5o = FNMS(KP414213562, T5n, T5m);
Chris@42 506 T5A = FMA(KP414213562, T5m, T5n);
Chris@42 507 {
Chris@42 508 E T5y, T5x, T5F, T5G, T5w, T5p;
Chris@42 509 T5y = FMA(KP668178637, T5s, T5v);
Chris@42 510 T5w = FNMS(KP668178637, T5v, T5s);
Chris@42 511 T5x = FMA(KP1_847759065, T5o, T5l);
Chris@42 512 T5p = FNMS(KP1_847759065, T5o, T5l);
Chris@42 513 T5F = FMA(KP1_847759065, T5A, T5z);
Chris@42 514 T5B = FNMS(KP1_847759065, T5A, T5z);
Chris@42 515 T5G = FMA(KP198912367, T5C, T5D);
Chris@42 516 T5E = FNMS(KP198912367, T5D, T5C);
Chris@42 517 R0[WS(rs, 3)] = FMA(KP1_662939224, T5w, T5p);
Chris@42 518 R0[WS(rs, 19)] = FNMS(KP1_662939224, T5w, T5p);
Chris@42 519 R0[WS(rs, 31)] = FMA(KP1_961570560, T5G, T5F);
Chris@42 520 R0[WS(rs, 15)] = FNMS(KP1_961570560, T5G, T5F);
Chris@42 521 R0[WS(rs, 27)] = FMA(KP1_662939224, T5y, T5x);
Chris@42 522 R0[WS(rs, 11)] = FNMS(KP1_662939224, T5y, T5x);
Chris@42 523 }
Chris@42 524 }
Chris@42 525 {
Chris@42 526 E T3R, T3n, T3J, T3S, T3u, T3M;
Chris@42 527 T3R = FMA(KP1_847759065, T3m, T3j);
Chris@42 528 T3n = FNMS(KP1_847759065, T3m, T3j);
Chris@42 529 R0[WS(rs, 23)] = FMA(KP1_961570560, T5E, T5B);
Chris@42 530 R0[WS(rs, 7)] = FNMS(KP1_961570560, T5E, T5B);
Chris@42 531 {
Chris@42 532 E T3q, T3t, T3y, T3F;
Chris@42 533 T43 = FMA(KP707106781, T3p, T3o);
Chris@42 534 T3q = FNMS(KP707106781, T3p, T3o);
Chris@42 535 T3t = FNMS(KP707106781, T3s, T3r);
Chris@42 536 T42 = FMA(KP707106781, T3s, T3r);
Chris@42 537 T46 = FMA(KP707106781, T3x, T3w);
Chris@42 538 T3y = FNMS(KP707106781, T3x, T3w);
Chris@42 539 T3F = T3B + T3E;
Chris@42 540 T4a = T3B - T3E;
Chris@42 541 T49 = FMA(KP707106781, T3I, T3H);
Chris@42 542 T3J = FNMS(KP707106781, T3I, T3H);
Chris@42 543 T3S = FMA(KP668178637, T3q, T3t);
Chris@42 544 T3u = FNMS(KP668178637, T3t, T3q);
Chris@42 545 T3V = FMA(KP923879532, T3F, T3y);
Chris@42 546 T3G = FNMS(KP923879532, T3F, T3y);
Chris@42 547 T3M = T3K - T3L;
Chris@42 548 T47 = T3K + T3L;
Chris@42 549 }
Chris@42 550 T3P = FNMS(KP1_662939224, T3u, T3n);
Chris@42 551 T3v = FMA(KP1_662939224, T3u, T3n);
Chris@42 552 T3X = FMA(KP1_662939224, T3S, T3R);
Chris@42 553 T3T = FNMS(KP1_662939224, T3S, T3R);
Chris@42 554 T3U = FNMS(KP923879532, T3M, T3J);
Chris@42 555 T3N = FMA(KP923879532, T3M, T3J);
Chris@42 556 }
Chris@42 557 {
Chris@42 558 E T2X, T2W, T30, T34, T33, T2P, T2u, T31, T2J, T1P, T2R, T2N, T2O, T2H;
Chris@42 559 {
Chris@42 560 E T2L, T1n, T2D, T2M, T1O, T2G;
Chris@42 561 T2L = FNMS(KP1_847759065, T1m, T1b);
Chris@42 562 T1n = FMA(KP1_847759065, T1m, T1b);
Chris@42 563 {
Chris@42 564 E T3W, T3Y, T3Q, T3O;
Chris@42 565 T3W = FNMS(KP534511135, T3V, T3U);
Chris@42 566 T3Y = FMA(KP534511135, T3U, T3V);
Chris@42 567 T3Q = FMA(KP303346683, T3G, T3N);
Chris@42 568 T3O = FNMS(KP303346683, T3N, T3G);
Chris@42 569 R1[WS(rs, 21)] = FMA(KP1_763842528, T3W, T3T);
Chris@42 570 R1[WS(rs, 5)] = FNMS(KP1_763842528, T3W, T3T);
Chris@42 571 R1[WS(rs, 29)] = FMA(KP1_763842528, T3Y, T3X);
Chris@42 572 R1[WS(rs, 13)] = FNMS(KP1_763842528, T3Y, T3X);
Chris@42 573 R1[WS(rs, 25)] = FMA(KP1_913880671, T3Q, T3P);
Chris@42 574 R1[WS(rs, 9)] = FNMS(KP1_913880671, T3Q, T3P);
Chris@42 575 R1[WS(rs, 1)] = FMA(KP1_913880671, T3O, T3v);
Chris@42 576 R1[WS(rs, 17)] = FNMS(KP1_913880671, T3O, T3v);
Chris@42 577 }
Chris@42 578 {
Chris@42 579 E T1E, T1N, T26, T2t;
Chris@42 580 T2X = FNMS(KP707106781, T1D, T1s);
Chris@42 581 T1E = FMA(KP707106781, T1D, T1s);
Chris@42 582 T1N = FMA(KP707106781, T1M, T1J);
Chris@42 583 T2W = FNMS(KP707106781, T1M, T1J);
Chris@42 584 T30 = FNMS(KP707106781, T25, T1U);
Chris@42 585 T26 = FMA(KP707106781, T25, T1U);
Chris@42 586 T2t = T2h + T2s;
Chris@42 587 T34 = T2s - T2h;
Chris@42 588 T33 = FNMS(KP707106781, T2C, T2z);
Chris@42 589 T2D = FMA(KP707106781, T2C, T2z);
Chris@42 590 T2M = FMA(KP198912367, T1E, T1N);
Chris@42 591 T1O = FNMS(KP198912367, T1N, T1E);
Chris@42 592 T2P = FNMS(KP923879532, T2t, T26);
Chris@42 593 T2u = FMA(KP923879532, T2t, T26);
Chris@42 594 T2G = T2E + T2F;
Chris@42 595 T31 = T2E - T2F;
Chris@42 596 }
Chris@42 597 T2J = FNMS(KP1_961570560, T1O, T1n);
Chris@42 598 T1P = FMA(KP1_961570560, T1O, T1n);
Chris@42 599 T2R = FMA(KP1_961570560, T2M, T2L);
Chris@42 600 T2N = FNMS(KP1_961570560, T2M, T2L);
Chris@42 601 T2O = FNMS(KP923879532, T2G, T2D);
Chris@42 602 T2H = FMA(KP923879532, T2G, T2D);
Chris@42 603 }
Chris@42 604 {
Chris@42 605 E T4j, T48, T4d, T45, T4l, T4h, T4i, T4b;
Chris@42 606 {
Chris@42 607 E T4f, T41, T4g, T44;
Chris@42 608 T4f = FMA(KP1_847759065, T40, T3Z);
Chris@42 609 T41 = FNMS(KP1_847759065, T40, T3Z);
Chris@42 610 {
Chris@42 611 E T2Q, T2S, T2K, T2I;
Chris@42 612 T2Q = FNMS(KP820678790, T2P, T2O);
Chris@42 613 T2S = FMA(KP820678790, T2O, T2P);
Chris@42 614 T2K = FMA(KP098491403, T2u, T2H);
Chris@42 615 T2I = FNMS(KP098491403, T2H, T2u);
Chris@42 616 R1[WS(rs, 20)] = FMA(KP1_546020906, T2Q, T2N);
Chris@42 617 R1[WS(rs, 4)] = FNMS(KP1_546020906, T2Q, T2N);
Chris@42 618 R1[WS(rs, 28)] = FMA(KP1_546020906, T2S, T2R);
Chris@42 619 R1[WS(rs, 12)] = FNMS(KP1_546020906, T2S, T2R);
Chris@42 620 R1[WS(rs, 24)] = FMA(KP1_990369453, T2K, T2J);
Chris@42 621 R1[WS(rs, 8)] = FNMS(KP1_990369453, T2K, T2J);
Chris@42 622 R1[0] = FMA(KP1_990369453, T2I, T1P);
Chris@42 623 R1[WS(rs, 16)] = FNMS(KP1_990369453, T2I, T1P);
Chris@42 624 }
Chris@42 625 T4g = FMA(KP198912367, T42, T43);
Chris@42 626 T44 = FNMS(KP198912367, T43, T42);
Chris@42 627 T4j = FMA(KP923879532, T47, T46);
Chris@42 628 T48 = FNMS(KP923879532, T47, T46);
Chris@42 629 T4d = FMA(KP1_961570560, T44, T41);
Chris@42 630 T45 = FNMS(KP1_961570560, T44, T41);
Chris@42 631 T4l = FMA(KP1_961570560, T4g, T4f);
Chris@42 632 T4h = FNMS(KP1_961570560, T4g, T4f);
Chris@42 633 T4i = FMA(KP923879532, T4a, T49);
Chris@42 634 T4b = FNMS(KP923879532, T4a, T49);
Chris@42 635 }
Chris@42 636 {
Chris@42 637 E T39, T2V, T3a, T2Y;
Chris@42 638 T39 = FMA(KP1_847759065, T2U, T2T);
Chris@42 639 T2V = FNMS(KP1_847759065, T2U, T2T);
Chris@42 640 {
Chris@42 641 E T4k, T4m, T4e, T4c;
Chris@42 642 T4k = FNMS(KP098491403, T4j, T4i);
Chris@42 643 T4m = FMA(KP098491403, T4i, T4j);
Chris@42 644 T4e = FMA(KP820678790, T48, T4b);
Chris@42 645 T4c = FNMS(KP820678790, T4b, T48);
Chris@42 646 R1[WS(rs, 23)] = FMA(KP1_990369453, T4k, T4h);
Chris@42 647 R1[WS(rs, 7)] = FNMS(KP1_990369453, T4k, T4h);
Chris@42 648 R1[WS(rs, 31)] = FMA(KP1_990369453, T4m, T4l);
Chris@42 649 R1[WS(rs, 15)] = FNMS(KP1_990369453, T4m, T4l);
Chris@42 650 R1[WS(rs, 27)] = FMA(KP1_546020906, T4e, T4d);
Chris@42 651 R1[WS(rs, 11)] = FNMS(KP1_546020906, T4e, T4d);
Chris@42 652 R1[WS(rs, 3)] = FMA(KP1_546020906, T4c, T45);
Chris@42 653 R1[WS(rs, 19)] = FNMS(KP1_546020906, T4c, T45);
Chris@42 654 }
Chris@42 655 T3a = FMA(KP668178637, T2W, T2X);
Chris@42 656 T2Y = FNMS(KP668178637, T2X, T2W);
Chris@42 657 T3d = FMA(KP923879532, T31, T30);
Chris@42 658 T32 = FNMS(KP923879532, T31, T30);
Chris@42 659 T37 = FMA(KP1_662939224, T2Y, T2V);
Chris@42 660 T2Z = FNMS(KP1_662939224, T2Y, T2V);
Chris@42 661 T3f = FMA(KP1_662939224, T3a, T39);
Chris@42 662 T3b = FNMS(KP1_662939224, T3a, T39);
Chris@42 663 T3c = FMA(KP923879532, T34, T33);
Chris@42 664 T35 = FNMS(KP923879532, T34, T33);
Chris@42 665 }
Chris@42 666 }
Chris@42 667 }
Chris@42 668 }
Chris@42 669 }
Chris@42 670 }
Chris@42 671 {
Chris@42 672 E T3g, T3e, T36, T38;
Chris@42 673 T3g = FMA(KP303346683, T3c, T3d);
Chris@42 674 T3e = FNMS(KP303346683, T3d, T3c);
Chris@42 675 T36 = FNMS(KP534511135, T35, T32);
Chris@42 676 T38 = FMA(KP534511135, T32, T35);
Chris@42 677 R1[WS(rs, 22)] = FMA(KP1_913880671, T3e, T3b);
Chris@42 678 R1[WS(rs, 6)] = FNMS(KP1_913880671, T3e, T3b);
Chris@42 679 R1[WS(rs, 30)] = FMA(KP1_913880671, T3g, T3f);
Chris@42 680 R1[WS(rs, 14)] = FNMS(KP1_913880671, T3g, T3f);
Chris@42 681 R1[WS(rs, 26)] = FMA(KP1_763842528, T38, T37);
Chris@42 682 R1[WS(rs, 10)] = FNMS(KP1_763842528, T38, T37);
Chris@42 683 R1[WS(rs, 2)] = FMA(KP1_763842528, T36, T2Z);
Chris@42 684 R1[WS(rs, 18)] = FNMS(KP1_763842528, T36, T2Z);
Chris@42 685 }
Chris@42 686 }
Chris@42 687 }
Chris@42 688 }
Chris@42 689
Chris@42 690 static const kr2c_desc desc = { 64, "r2cb_64", {178, 0, 216, 0}, &GENUS };
Chris@42 691
Chris@42 692 void X(codelet_r2cb_64) (planner *p) {
Chris@42 693 X(kr2c_register) (p, r2cb_64, &desc);
Chris@42 694 }
Chris@42 695
Chris@42 696 #else /* HAVE_FMA */
Chris@42 697
Chris@42 698 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include r2cb.h */
Chris@42 699
Chris@42 700 /*
Chris@42 701 * This function contains 394 FP additions, 134 FP multiplications,
Chris@42 702 * (or, 342 additions, 82 multiplications, 52 fused multiply/add),
Chris@42 703 * 110 stack variables, 19 constants, and 128 memory accesses
Chris@42 704 */
Chris@42 705 #include "r2cb.h"
Chris@42 706
Chris@42 707 static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 708 {
Chris@42 709 DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
Chris@42 710 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@42 711 DK(KP196034280, +0.196034280659121203988391127777283691722273346);
Chris@42 712 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@42 713 DK(KP942793473, +0.942793473651995297112775251810508755314920638);
Chris@42 714 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@42 715 DK(KP580569354, +0.580569354508924735272384751634790549382952557);
Chris@42 716 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@42 717 DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
Chris@42 718 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@42 719 DK(KP390180644, +0.390180644032256535696569736954044481855383236);
Chris@42 720 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@42 721 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 722 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 723 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 724 DK(KP765366864, +0.765366864730179543456919968060797733522689125);
Chris@42 725 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@42 726 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@42 727 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 728 {
Chris@42 729 INT i;
Chris@42 730 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 731 E Ta, T2S, T18, T2u, T3F, T4V, T5l, T61, Th, T2T, T1h, T2v, T3M, T4W, T5o;
Chris@42 732 E T62, T3Q, T5q, T5u, T44, Tp, Tw, T2V, T2W, T2X, T2Y, T3X, T5t, T1r, T2x;
Chris@42 733 E T41, T5r, T1A, T2y, T4a, T5y, T5N, T4H, TN, T31, T4E, T5z, T39, T3q, T1L;
Chris@42 734 E T2B, T4h, T5M, T2h, T2F, T12, T36, T5D, T5J, T5G, T5K, T1U, T26, T23, T27;
Chris@42 735 E T4p, T4z, T4w, T4A, T34, T3r;
Chris@42 736 {
Chris@42 737 E T5, T3A, T3, T3y, T9, T3C, T17, T3D, T6, T14;
Chris@42 738 {
Chris@42 739 E T4, T3z, T1, T2;
Chris@42 740 T4 = Cr[WS(csr, 16)];
Chris@42 741 T5 = KP2_000000000 * T4;
Chris@42 742 T3z = Ci[WS(csi, 16)];
Chris@42 743 T3A = KP2_000000000 * T3z;
Chris@42 744 T1 = Cr[0];
Chris@42 745 T2 = Cr[WS(csr, 32)];
Chris@42 746 T3 = T1 + T2;
Chris@42 747 T3y = T1 - T2;
Chris@42 748 {
Chris@42 749 E T7, T8, T15, T16;
Chris@42 750 T7 = Cr[WS(csr, 8)];
Chris@42 751 T8 = Cr[WS(csr, 24)];
Chris@42 752 T9 = KP2_000000000 * (T7 + T8);
Chris@42 753 T3C = T7 - T8;
Chris@42 754 T15 = Ci[WS(csi, 8)];
Chris@42 755 T16 = Ci[WS(csi, 24)];
Chris@42 756 T17 = KP2_000000000 * (T15 - T16);
Chris@42 757 T3D = T15 + T16;
Chris@42 758 }
Chris@42 759 }
Chris@42 760 T6 = T3 + T5;
Chris@42 761 Ta = T6 + T9;
Chris@42 762 T2S = T6 - T9;
Chris@42 763 T14 = T3 - T5;
Chris@42 764 T18 = T14 - T17;
Chris@42 765 T2u = T14 + T17;
Chris@42 766 {
Chris@42 767 E T3B, T3E, T5j, T5k;
Chris@42 768 T3B = T3y - T3A;
Chris@42 769 T3E = KP1_414213562 * (T3C - T3D);
Chris@42 770 T3F = T3B + T3E;
Chris@42 771 T4V = T3B - T3E;
Chris@42 772 T5j = T3y + T3A;
Chris@42 773 T5k = KP1_414213562 * (T3C + T3D);
Chris@42 774 T5l = T5j - T5k;
Chris@42 775 T61 = T5j + T5k;
Chris@42 776 }
Chris@42 777 }
Chris@42 778 {
Chris@42 779 E Td, T3G, T1c, T3K, Tg, T3J, T1f, T3H, T19, T1g;
Chris@42 780 {
Chris@42 781 E Tb, Tc, T1a, T1b;
Chris@42 782 Tb = Cr[WS(csr, 4)];
Chris@42 783 Tc = Cr[WS(csr, 28)];
Chris@42 784 Td = Tb + Tc;
Chris@42 785 T3G = Tb - Tc;
Chris@42 786 T1a = Ci[WS(csi, 4)];
Chris@42 787 T1b = Ci[WS(csi, 28)];
Chris@42 788 T1c = T1a - T1b;
Chris@42 789 T3K = T1a + T1b;
Chris@42 790 }
Chris@42 791 {
Chris@42 792 E Te, Tf, T1d, T1e;
Chris@42 793 Te = Cr[WS(csr, 20)];
Chris@42 794 Tf = Cr[WS(csr, 12)];
Chris@42 795 Tg = Te + Tf;
Chris@42 796 T3J = Te - Tf;
Chris@42 797 T1d = Ci[WS(csi, 20)];
Chris@42 798 T1e = Ci[WS(csi, 12)];
Chris@42 799 T1f = T1d - T1e;
Chris@42 800 T3H = T1d + T1e;
Chris@42 801 }
Chris@42 802 Th = KP2_000000000 * (Td + Tg);
Chris@42 803 T2T = KP2_000000000 * (T1f + T1c);
Chris@42 804 T19 = Td - Tg;
Chris@42 805 T1g = T1c - T1f;
Chris@42 806 T1h = KP1_414213562 * (T19 - T1g);
Chris@42 807 T2v = KP1_414213562 * (T19 + T1g);
Chris@42 808 {
Chris@42 809 E T3I, T3L, T5m, T5n;
Chris@42 810 T3I = T3G - T3H;
Chris@42 811 T3L = T3J + T3K;
Chris@42 812 T3M = FNMS(KP765366864, T3L, KP1_847759065 * T3I);
Chris@42 813 T4W = FMA(KP765366864, T3I, KP1_847759065 * T3L);
Chris@42 814 T5m = T3G + T3H;
Chris@42 815 T5n = T3K - T3J;
Chris@42 816 T5o = FNMS(KP1_847759065, T5n, KP765366864 * T5m);
Chris@42 817 T62 = FMA(KP1_847759065, T5m, KP765366864 * T5n);
Chris@42 818 }
Chris@42 819 }
Chris@42 820 {
Chris@42 821 E Tl, T3O, T1v, T43, To, T42, T1y, T3P, Ts, T3R, T1p, T3S, Tv, T3U, T1m;
Chris@42 822 E T3V;
Chris@42 823 {
Chris@42 824 E Tj, Tk, T1t, T1u;
Chris@42 825 Tj = Cr[WS(csr, 2)];
Chris@42 826 Tk = Cr[WS(csr, 30)];
Chris@42 827 Tl = Tj + Tk;
Chris@42 828 T3O = Tj - Tk;
Chris@42 829 T1t = Ci[WS(csi, 2)];
Chris@42 830 T1u = Ci[WS(csi, 30)];
Chris@42 831 T1v = T1t - T1u;
Chris@42 832 T43 = T1t + T1u;
Chris@42 833 }
Chris@42 834 {
Chris@42 835 E Tm, Tn, T1w, T1x;
Chris@42 836 Tm = Cr[WS(csr, 18)];
Chris@42 837 Tn = Cr[WS(csr, 14)];
Chris@42 838 To = Tm + Tn;
Chris@42 839 T42 = Tm - Tn;
Chris@42 840 T1w = Ci[WS(csi, 18)];
Chris@42 841 T1x = Ci[WS(csi, 14)];
Chris@42 842 T1y = T1w - T1x;
Chris@42 843 T3P = T1w + T1x;
Chris@42 844 }
Chris@42 845 {
Chris@42 846 E Tq, Tr, T1n, T1o;
Chris@42 847 Tq = Cr[WS(csr, 10)];
Chris@42 848 Tr = Cr[WS(csr, 22)];
Chris@42 849 Ts = Tq + Tr;
Chris@42 850 T3R = Tq - Tr;
Chris@42 851 T1n = Ci[WS(csi, 10)];
Chris@42 852 T1o = Ci[WS(csi, 22)];
Chris@42 853 T1p = T1n - T1o;
Chris@42 854 T3S = T1n + T1o;
Chris@42 855 }
Chris@42 856 {
Chris@42 857 E Tt, Tu, T1k, T1l;
Chris@42 858 Tt = Cr[WS(csr, 6)];
Chris@42 859 Tu = Cr[WS(csr, 26)];
Chris@42 860 Tv = Tt + Tu;
Chris@42 861 T3U = Tt - Tu;
Chris@42 862 T1k = Ci[WS(csi, 26)];
Chris@42 863 T1l = Ci[WS(csi, 6)];
Chris@42 864 T1m = T1k - T1l;
Chris@42 865 T3V = T1l + T1k;
Chris@42 866 }
Chris@42 867 T3Q = T3O - T3P;
Chris@42 868 T5q = T3O + T3P;
Chris@42 869 T5u = T43 - T42;
Chris@42 870 T44 = T42 + T43;
Chris@42 871 Tp = Tl + To;
Chris@42 872 Tw = Ts + Tv;
Chris@42 873 T2V = Tp - Tw;
Chris@42 874 {
Chris@42 875 E T3T, T3W, T1j, T1q;
Chris@42 876 T2W = T1y + T1v;
Chris@42 877 T2X = T1p + T1m;
Chris@42 878 T2Y = T2W - T2X;
Chris@42 879 T3T = T3R - T3S;
Chris@42 880 T3W = T3U - T3V;
Chris@42 881 T3X = KP707106781 * (T3T + T3W);
Chris@42 882 T5t = KP707106781 * (T3T - T3W);
Chris@42 883 T1j = Tl - To;
Chris@42 884 T1q = T1m - T1p;
Chris@42 885 T1r = T1j + T1q;
Chris@42 886 T2x = T1j - T1q;
Chris@42 887 {
Chris@42 888 E T3Z, T40, T1s, T1z;
Chris@42 889 T3Z = T3R + T3S;
Chris@42 890 T40 = T3U + T3V;
Chris@42 891 T41 = KP707106781 * (T3Z - T40);
Chris@42 892 T5r = KP707106781 * (T3Z + T40);
Chris@42 893 T1s = Ts - Tv;
Chris@42 894 T1z = T1v - T1y;
Chris@42 895 T1A = T1s + T1z;
Chris@42 896 T2y = T1z - T1s;
Chris@42 897 }
Chris@42 898 }
Chris@42 899 }
Chris@42 900 {
Chris@42 901 E TB, T48, T2c, T4G, TE, T4F, T2f, T49, TI, T4b, T1J, T4c, TL, T4e, T1G;
Chris@42 902 E T4f;
Chris@42 903 {
Chris@42 904 E Tz, TA, T2a, T2b;
Chris@42 905 Tz = Cr[WS(csr, 1)];
Chris@42 906 TA = Cr[WS(csr, 31)];
Chris@42 907 TB = Tz + TA;
Chris@42 908 T48 = Tz - TA;
Chris@42 909 T2a = Ci[WS(csi, 1)];
Chris@42 910 T2b = Ci[WS(csi, 31)];
Chris@42 911 T2c = T2a - T2b;
Chris@42 912 T4G = T2a + T2b;
Chris@42 913 }
Chris@42 914 {
Chris@42 915 E TC, TD, T2d, T2e;
Chris@42 916 TC = Cr[WS(csr, 17)];
Chris@42 917 TD = Cr[WS(csr, 15)];
Chris@42 918 TE = TC + TD;
Chris@42 919 T4F = TC - TD;
Chris@42 920 T2d = Ci[WS(csi, 17)];
Chris@42 921 T2e = Ci[WS(csi, 15)];
Chris@42 922 T2f = T2d - T2e;
Chris@42 923 T49 = T2d + T2e;
Chris@42 924 }
Chris@42 925 {
Chris@42 926 E TG, TH, T1H, T1I;
Chris@42 927 TG = Cr[WS(csr, 9)];
Chris@42 928 TH = Cr[WS(csr, 23)];
Chris@42 929 TI = TG + TH;
Chris@42 930 T4b = TG - TH;
Chris@42 931 T1H = Ci[WS(csi, 9)];
Chris@42 932 T1I = Ci[WS(csi, 23)];
Chris@42 933 T1J = T1H - T1I;
Chris@42 934 T4c = T1H + T1I;
Chris@42 935 }
Chris@42 936 {
Chris@42 937 E TJ, TK, T1E, T1F;
Chris@42 938 TJ = Cr[WS(csr, 7)];
Chris@42 939 TK = Cr[WS(csr, 25)];
Chris@42 940 TL = TJ + TK;
Chris@42 941 T4e = TJ - TK;
Chris@42 942 T1E = Ci[WS(csi, 25)];
Chris@42 943 T1F = Ci[WS(csi, 7)];
Chris@42 944 T1G = T1E - T1F;
Chris@42 945 T4f = T1F + T1E;
Chris@42 946 }
Chris@42 947 {
Chris@42 948 E TF, TM, T1D, T1K;
Chris@42 949 T4a = T48 - T49;
Chris@42 950 T5y = T48 + T49;
Chris@42 951 T5N = T4G - T4F;
Chris@42 952 T4H = T4F + T4G;
Chris@42 953 TF = TB + TE;
Chris@42 954 TM = TI + TL;
Chris@42 955 TN = TF + TM;
Chris@42 956 T31 = TF - TM;
Chris@42 957 {
Chris@42 958 E T4C, T4D, T37, T38;
Chris@42 959 T4C = T4b + T4c;
Chris@42 960 T4D = T4e + T4f;
Chris@42 961 T4E = KP707106781 * (T4C - T4D);
Chris@42 962 T5z = KP707106781 * (T4C + T4D);
Chris@42 963 T37 = T2f + T2c;
Chris@42 964 T38 = T1J + T1G;
Chris@42 965 T39 = T37 - T38;
Chris@42 966 T3q = T38 + T37;
Chris@42 967 }
Chris@42 968 T1D = TB - TE;
Chris@42 969 T1K = T1G - T1J;
Chris@42 970 T1L = T1D + T1K;
Chris@42 971 T2B = T1D - T1K;
Chris@42 972 {
Chris@42 973 E T4d, T4g, T29, T2g;
Chris@42 974 T4d = T4b - T4c;
Chris@42 975 T4g = T4e - T4f;
Chris@42 976 T4h = KP707106781 * (T4d + T4g);
Chris@42 977 T5M = KP707106781 * (T4d - T4g);
Chris@42 978 T29 = TI - TL;
Chris@42 979 T2g = T2c - T2f;
Chris@42 980 T2h = T29 + T2g;
Chris@42 981 T2F = T2g - T29;
Chris@42 982 }
Chris@42 983 }
Chris@42 984 }
Chris@42 985 {
Chris@42 986 E TQ, T4j, T1P, T4n, TT, T4m, T1S, T4k, TX, T4q, T1Y, T4u, T10, T4t, T21;
Chris@42 987 E T4r;
Chris@42 988 {
Chris@42 989 E TO, TP, T1N, T1O;
Chris@42 990 TO = Cr[WS(csr, 5)];
Chris@42 991 TP = Cr[WS(csr, 27)];
Chris@42 992 TQ = TO + TP;
Chris@42 993 T4j = TO - TP;
Chris@42 994 T1N = Ci[WS(csi, 5)];
Chris@42 995 T1O = Ci[WS(csi, 27)];
Chris@42 996 T1P = T1N - T1O;
Chris@42 997 T4n = T1N + T1O;
Chris@42 998 }
Chris@42 999 {
Chris@42 1000 E TR, TS, T1Q, T1R;
Chris@42 1001 TR = Cr[WS(csr, 21)];
Chris@42 1002 TS = Cr[WS(csr, 11)];
Chris@42 1003 TT = TR + TS;
Chris@42 1004 T4m = TR - TS;
Chris@42 1005 T1Q = Ci[WS(csi, 21)];
Chris@42 1006 T1R = Ci[WS(csi, 11)];
Chris@42 1007 T1S = T1Q - T1R;
Chris@42 1008 T4k = T1Q + T1R;
Chris@42 1009 }
Chris@42 1010 {
Chris@42 1011 E TV, TW, T1W, T1X;
Chris@42 1012 TV = Cr[WS(csr, 3)];
Chris@42 1013 TW = Cr[WS(csr, 29)];
Chris@42 1014 TX = TV + TW;
Chris@42 1015 T4q = TV - TW;
Chris@42 1016 T1W = Ci[WS(csi, 29)];
Chris@42 1017 T1X = Ci[WS(csi, 3)];
Chris@42 1018 T1Y = T1W - T1X;
Chris@42 1019 T4u = T1X + T1W;
Chris@42 1020 }
Chris@42 1021 {
Chris@42 1022 E TY, TZ, T1Z, T20;
Chris@42 1023 TY = Cr[WS(csr, 13)];
Chris@42 1024 TZ = Cr[WS(csr, 19)];
Chris@42 1025 T10 = TY + TZ;
Chris@42 1026 T4t = TY - TZ;
Chris@42 1027 T1Z = Ci[WS(csi, 13)];
Chris@42 1028 T20 = Ci[WS(csi, 19)];
Chris@42 1029 T21 = T1Z - T20;
Chris@42 1030 T4r = T1Z + T20;
Chris@42 1031 }
Chris@42 1032 {
Chris@42 1033 E TU, T11, T5B, T5C;
Chris@42 1034 TU = TQ + TT;
Chris@42 1035 T11 = TX + T10;
Chris@42 1036 T12 = TU + T11;
Chris@42 1037 T36 = TU - T11;
Chris@42 1038 T5B = T4j + T4k;
Chris@42 1039 T5C = T4n - T4m;
Chris@42 1040 T5D = FNMS(KP923879532, T5C, KP382683432 * T5B);
Chris@42 1041 T5J = FMA(KP923879532, T5B, KP382683432 * T5C);
Chris@42 1042 }
Chris@42 1043 {
Chris@42 1044 E T5E, T5F, T1M, T1T;
Chris@42 1045 T5E = T4q + T4r;
Chris@42 1046 T5F = T4t + T4u;
Chris@42 1047 T5G = FNMS(KP923879532, T5F, KP382683432 * T5E);
Chris@42 1048 T5K = FMA(KP923879532, T5E, KP382683432 * T5F);
Chris@42 1049 T1M = TQ - TT;
Chris@42 1050 T1T = T1P - T1S;
Chris@42 1051 T1U = T1M - T1T;
Chris@42 1052 T26 = T1M + T1T;
Chris@42 1053 }
Chris@42 1054 {
Chris@42 1055 E T1V, T22, T4l, T4o;
Chris@42 1056 T1V = TX - T10;
Chris@42 1057 T22 = T1Y - T21;
Chris@42 1058 T23 = T1V + T22;
Chris@42 1059 T27 = T22 - T1V;
Chris@42 1060 T4l = T4j - T4k;
Chris@42 1061 T4o = T4m + T4n;
Chris@42 1062 T4p = FNMS(KP382683432, T4o, KP923879532 * T4l);
Chris@42 1063 T4z = FMA(KP382683432, T4l, KP923879532 * T4o);
Chris@42 1064 }
Chris@42 1065 {
Chris@42 1066 E T4s, T4v, T32, T33;
Chris@42 1067 T4s = T4q - T4r;
Chris@42 1068 T4v = T4t - T4u;
Chris@42 1069 T4w = FMA(KP923879532, T4s, KP382683432 * T4v);
Chris@42 1070 T4A = FNMS(KP382683432, T4s, KP923879532 * T4v);
Chris@42 1071 T32 = T21 + T1Y;
Chris@42 1072 T33 = T1S + T1P;
Chris@42 1073 T34 = T32 - T33;
Chris@42 1074 T3r = T33 + T32;
Chris@42 1075 }
Chris@42 1076 }
Chris@42 1077 {
Chris@42 1078 E T13, T3x, Ty, T3w, Ti, Tx;
Chris@42 1079 T13 = KP2_000000000 * (TN + T12);
Chris@42 1080 T3x = KP2_000000000 * (T3r + T3q);
Chris@42 1081 Ti = Ta + Th;
Chris@42 1082 Tx = KP2_000000000 * (Tp + Tw);
Chris@42 1083 Ty = Ti + Tx;
Chris@42 1084 T3w = Ti - Tx;
Chris@42 1085 R0[WS(rs, 16)] = Ty - T13;
Chris@42 1086 R0[WS(rs, 24)] = T3w + T3x;
Chris@42 1087 R0[0] = Ty + T13;
Chris@42 1088 R0[WS(rs, 8)] = T3w - T3x;
Chris@42 1089 }
Chris@42 1090 {
Chris@42 1091 E T3g, T3k, T3j, T3l;
Chris@42 1092 {
Chris@42 1093 E T3e, T3f, T3h, T3i;
Chris@42 1094 T3e = T2S + T2T;
Chris@42 1095 T3f = KP1_414213562 * (T2V + T2Y);
Chris@42 1096 T3g = T3e - T3f;
Chris@42 1097 T3k = T3e + T3f;
Chris@42 1098 T3h = T31 - T34;
Chris@42 1099 T3i = T39 - T36;
Chris@42 1100 T3j = FNMS(KP1_847759065, T3i, KP765366864 * T3h);
Chris@42 1101 T3l = FMA(KP1_847759065, T3h, KP765366864 * T3i);
Chris@42 1102 }
Chris@42 1103 R0[WS(rs, 22)] = T3g - T3j;
Chris@42 1104 R0[WS(rs, 30)] = T3k + T3l;
Chris@42 1105 R0[WS(rs, 6)] = T3g + T3j;
Chris@42 1106 R0[WS(rs, 14)] = T3k - T3l;
Chris@42 1107 }
Chris@42 1108 {
Chris@42 1109 E T3o, T3u, T3t, T3v;
Chris@42 1110 {
Chris@42 1111 E T3m, T3n, T3p, T3s;
Chris@42 1112 T3m = Ta - Th;
Chris@42 1113 T3n = KP2_000000000 * (T2X + T2W);
Chris@42 1114 T3o = T3m - T3n;
Chris@42 1115 T3u = T3m + T3n;
Chris@42 1116 T3p = TN - T12;
Chris@42 1117 T3s = T3q - T3r;
Chris@42 1118 T3t = KP1_414213562 * (T3p - T3s);
Chris@42 1119 T3v = KP1_414213562 * (T3p + T3s);
Chris@42 1120 }
Chris@42 1121 R0[WS(rs, 20)] = T3o - T3t;
Chris@42 1122 R0[WS(rs, 28)] = T3u + T3v;
Chris@42 1123 R0[WS(rs, 4)] = T3o + T3t;
Chris@42 1124 R0[WS(rs, 12)] = T3u - T3v;
Chris@42 1125 }
Chris@42 1126 {
Chris@42 1127 E T30, T3c, T3b, T3d;
Chris@42 1128 {
Chris@42 1129 E T2U, T2Z, T35, T3a;
Chris@42 1130 T2U = T2S - T2T;
Chris@42 1131 T2Z = KP1_414213562 * (T2V - T2Y);
Chris@42 1132 T30 = T2U + T2Z;
Chris@42 1133 T3c = T2U - T2Z;
Chris@42 1134 T35 = T31 + T34;
Chris@42 1135 T3a = T36 + T39;
Chris@42 1136 T3b = FNMS(KP765366864, T3a, KP1_847759065 * T35);
Chris@42 1137 T3d = FMA(KP765366864, T35, KP1_847759065 * T3a);
Chris@42 1138 }
Chris@42 1139 R0[WS(rs, 18)] = T30 - T3b;
Chris@42 1140 R0[WS(rs, 26)] = T3c + T3d;
Chris@42 1141 R0[WS(rs, 2)] = T30 + T3b;
Chris@42 1142 R0[WS(rs, 10)] = T3c - T3d;
Chris@42 1143 }
Chris@42 1144 {
Chris@42 1145 E T25, T2p, T2i, T2q, T1C, T2k, T2o, T2s, T24, T28;
Chris@42 1146 T24 = KP707106781 * (T1U + T23);
Chris@42 1147 T25 = T1L + T24;
Chris@42 1148 T2p = T1L - T24;
Chris@42 1149 T28 = KP707106781 * (T26 + T27);
Chris@42 1150 T2i = T28 + T2h;
Chris@42 1151 T2q = T2h - T28;
Chris@42 1152 {
Chris@42 1153 E T1i, T1B, T2m, T2n;
Chris@42 1154 T1i = T18 + T1h;
Chris@42 1155 T1B = FNMS(KP765366864, T1A, KP1_847759065 * T1r);
Chris@42 1156 T1C = T1i + T1B;
Chris@42 1157 T2k = T1i - T1B;
Chris@42 1158 T2m = T18 - T1h;
Chris@42 1159 T2n = FMA(KP765366864, T1r, KP1_847759065 * T1A);
Chris@42 1160 T2o = T2m - T2n;
Chris@42 1161 T2s = T2m + T2n;
Chris@42 1162 }
Chris@42 1163 {
Chris@42 1164 E T2j, T2t, T2l, T2r;
Chris@42 1165 T2j = FNMS(KP390180644, T2i, KP1_961570560 * T25);
Chris@42 1166 R0[WS(rs, 17)] = T1C - T2j;
Chris@42 1167 R0[WS(rs, 1)] = T1C + T2j;
Chris@42 1168 T2t = FMA(KP1_662939224, T2p, KP1_111140466 * T2q);
Chris@42 1169 R0[WS(rs, 13)] = T2s - T2t;
Chris@42 1170 R0[WS(rs, 29)] = T2s + T2t;
Chris@42 1171 T2l = FMA(KP390180644, T25, KP1_961570560 * T2i);
Chris@42 1172 R0[WS(rs, 9)] = T2k - T2l;
Chris@42 1173 R0[WS(rs, 25)] = T2k + T2l;
Chris@42 1174 T2r = FNMS(KP1_662939224, T2q, KP1_111140466 * T2p);
Chris@42 1175 R0[WS(rs, 21)] = T2o - T2r;
Chris@42 1176 R0[WS(rs, 5)] = T2o + T2r;
Chris@42 1177 }
Chris@42 1178 }
Chris@42 1179 {
Chris@42 1180 E T2D, T2N, T2G, T2O, T2A, T2I, T2M, T2Q, T2C, T2E;
Chris@42 1181 T2C = KP707106781 * (T27 - T26);
Chris@42 1182 T2D = T2B + T2C;
Chris@42 1183 T2N = T2B - T2C;
Chris@42 1184 T2E = KP707106781 * (T1U - T23);
Chris@42 1185 T2G = T2E + T2F;
Chris@42 1186 T2O = T2F - T2E;
Chris@42 1187 {
Chris@42 1188 E T2w, T2z, T2K, T2L;
Chris@42 1189 T2w = T2u - T2v;
Chris@42 1190 T2z = FNMS(KP1_847759065, T2y, KP765366864 * T2x);
Chris@42 1191 T2A = T2w + T2z;
Chris@42 1192 T2I = T2w - T2z;
Chris@42 1193 T2K = T2u + T2v;
Chris@42 1194 T2L = FMA(KP1_847759065, T2x, KP765366864 * T2y);
Chris@42 1195 T2M = T2K - T2L;
Chris@42 1196 T2Q = T2K + T2L;
Chris@42 1197 }
Chris@42 1198 {
Chris@42 1199 E T2H, T2R, T2J, T2P;
Chris@42 1200 T2H = FNMS(KP1_111140466, T2G, KP1_662939224 * T2D);
Chris@42 1201 R0[WS(rs, 19)] = T2A - T2H;
Chris@42 1202 R0[WS(rs, 3)] = T2A + T2H;
Chris@42 1203 T2R = FMA(KP1_961570560, T2N, KP390180644 * T2O);
Chris@42 1204 R0[WS(rs, 15)] = T2Q - T2R;
Chris@42 1205 R0[WS(rs, 31)] = T2Q + T2R;
Chris@42 1206 T2J = FMA(KP1_111140466, T2D, KP1_662939224 * T2G);
Chris@42 1207 R0[WS(rs, 11)] = T2I - T2J;
Chris@42 1208 R0[WS(rs, 27)] = T2I + T2J;
Chris@42 1209 T2P = FNMS(KP1_961570560, T2O, KP390180644 * T2N);
Chris@42 1210 R0[WS(rs, 23)] = T2M - T2P;
Chris@42 1211 R0[WS(rs, 7)] = T2M + T2P;
Chris@42 1212 }
Chris@42 1213 }
Chris@42 1214 {
Chris@42 1215 E T5p, T5T, T5w, T5U, T5I, T5W, T5P, T5X, T5s, T5v;
Chris@42 1216 T5p = T5l + T5o;
Chris@42 1217 T5T = T5l - T5o;
Chris@42 1218 T5s = T5q - T5r;
Chris@42 1219 T5v = T5t + T5u;
Chris@42 1220 T5w = FNMS(KP1_111140466, T5v, KP1_662939224 * T5s);
Chris@42 1221 T5U = FMA(KP1_111140466, T5s, KP1_662939224 * T5v);
Chris@42 1222 {
Chris@42 1223 E T5A, T5H, T5L, T5O;
Chris@42 1224 T5A = T5y - T5z;
Chris@42 1225 T5H = T5D + T5G;
Chris@42 1226 T5I = T5A + T5H;
Chris@42 1227 T5W = T5A - T5H;
Chris@42 1228 T5L = T5J - T5K;
Chris@42 1229 T5O = T5M + T5N;
Chris@42 1230 T5P = T5L + T5O;
Chris@42 1231 T5X = T5O - T5L;
Chris@42 1232 }
Chris@42 1233 {
Chris@42 1234 E T5x, T5Q, T5Z, T60;
Chris@42 1235 T5x = T5p + T5w;
Chris@42 1236 T5Q = FNMS(KP580569354, T5P, KP1_913880671 * T5I);
Chris@42 1237 R1[WS(rs, 17)] = T5x - T5Q;
Chris@42 1238 R1[WS(rs, 1)] = T5x + T5Q;
Chris@42 1239 T5Z = T5T + T5U;
Chris@42 1240 T60 = FMA(KP1_763842528, T5W, KP942793473 * T5X);
Chris@42 1241 R1[WS(rs, 13)] = T5Z - T60;
Chris@42 1242 R1[WS(rs, 29)] = T5Z + T60;
Chris@42 1243 }
Chris@42 1244 {
Chris@42 1245 E T5R, T5S, T5V, T5Y;
Chris@42 1246 T5R = T5p - T5w;
Chris@42 1247 T5S = FMA(KP580569354, T5I, KP1_913880671 * T5P);
Chris@42 1248 R1[WS(rs, 9)] = T5R - T5S;
Chris@42 1249 R1[WS(rs, 25)] = T5R + T5S;
Chris@42 1250 T5V = T5T - T5U;
Chris@42 1251 T5Y = FNMS(KP1_763842528, T5X, KP942793473 * T5W);
Chris@42 1252 R1[WS(rs, 21)] = T5V - T5Y;
Chris@42 1253 R1[WS(rs, 5)] = T5V + T5Y;
Chris@42 1254 }
Chris@42 1255 }
Chris@42 1256 {
Chris@42 1257 E T3N, T4N, T46, T4O, T4y, T4Q, T4J, T4R, T3Y, T45;
Chris@42 1258 T3N = T3F + T3M;
Chris@42 1259 T4N = T3F - T3M;
Chris@42 1260 T3Y = T3Q + T3X;
Chris@42 1261 T45 = T41 + T44;
Chris@42 1262 T46 = FNMS(KP390180644, T45, KP1_961570560 * T3Y);
Chris@42 1263 T4O = FMA(KP390180644, T3Y, KP1_961570560 * T45);
Chris@42 1264 {
Chris@42 1265 E T4i, T4x, T4B, T4I;
Chris@42 1266 T4i = T4a + T4h;
Chris@42 1267 T4x = T4p + T4w;
Chris@42 1268 T4y = T4i + T4x;
Chris@42 1269 T4Q = T4i - T4x;
Chris@42 1270 T4B = T4z + T4A;
Chris@42 1271 T4I = T4E + T4H;
Chris@42 1272 T4J = T4B + T4I;
Chris@42 1273 T4R = T4I - T4B;
Chris@42 1274 }
Chris@42 1275 {
Chris@42 1276 E T47, T4K, T4T, T4U;
Chris@42 1277 T47 = T3N + T46;
Chris@42 1278 T4K = FNMS(KP196034280, T4J, KP1_990369453 * T4y);
Chris@42 1279 R1[WS(rs, 16)] = T47 - T4K;
Chris@42 1280 R1[0] = T47 + T4K;
Chris@42 1281 T4T = T4N + T4O;
Chris@42 1282 T4U = FMA(KP1_546020906, T4Q, KP1_268786568 * T4R);
Chris@42 1283 R1[WS(rs, 12)] = T4T - T4U;
Chris@42 1284 R1[WS(rs, 28)] = T4T + T4U;
Chris@42 1285 }
Chris@42 1286 {
Chris@42 1287 E T4L, T4M, T4P, T4S;
Chris@42 1288 T4L = T3N - T46;
Chris@42 1289 T4M = FMA(KP196034280, T4y, KP1_990369453 * T4J);
Chris@42 1290 R1[WS(rs, 8)] = T4L - T4M;
Chris@42 1291 R1[WS(rs, 24)] = T4L + T4M;
Chris@42 1292 T4P = T4N - T4O;
Chris@42 1293 T4S = FNMS(KP1_546020906, T4R, KP1_268786568 * T4Q);
Chris@42 1294 R1[WS(rs, 20)] = T4P - T4S;
Chris@42 1295 R1[WS(rs, 4)] = T4P + T4S;
Chris@42 1296 }
Chris@42 1297 }
Chris@42 1298 {
Chris@42 1299 E T63, T6h, T66, T6i, T6a, T6k, T6d, T6l, T64, T65;
Chris@42 1300 T63 = T61 - T62;
Chris@42 1301 T6h = T61 + T62;
Chris@42 1302 T64 = T5q + T5r;
Chris@42 1303 T65 = T5u - T5t;
Chris@42 1304 T66 = FNMS(KP1_961570560, T65, KP390180644 * T64);
Chris@42 1305 T6i = FMA(KP1_961570560, T64, KP390180644 * T65);
Chris@42 1306 {
Chris@42 1307 E T68, T69, T6b, T6c;
Chris@42 1308 T68 = T5y + T5z;
Chris@42 1309 T69 = T5J + T5K;
Chris@42 1310 T6a = T68 - T69;
Chris@42 1311 T6k = T68 + T69;
Chris@42 1312 T6b = T5D - T5G;
Chris@42 1313 T6c = T5N - T5M;
Chris@42 1314 T6d = T6b + T6c;
Chris@42 1315 T6l = T6c - T6b;
Chris@42 1316 }
Chris@42 1317 {
Chris@42 1318 E T67, T6e, T6n, T6o;
Chris@42 1319 T67 = T63 + T66;
Chris@42 1320 T6e = FNMS(KP1_268786568, T6d, KP1_546020906 * T6a);
Chris@42 1321 R1[WS(rs, 19)] = T67 - T6e;
Chris@42 1322 R1[WS(rs, 3)] = T67 + T6e;
Chris@42 1323 T6n = T6h + T6i;
Chris@42 1324 T6o = FMA(KP1_990369453, T6k, KP196034280 * T6l);
Chris@42 1325 R1[WS(rs, 15)] = T6n - T6o;
Chris@42 1326 R1[WS(rs, 31)] = T6n + T6o;
Chris@42 1327 }
Chris@42 1328 {
Chris@42 1329 E T6f, T6g, T6j, T6m;
Chris@42 1330 T6f = T63 - T66;
Chris@42 1331 T6g = FMA(KP1_268786568, T6a, KP1_546020906 * T6d);
Chris@42 1332 R1[WS(rs, 11)] = T6f - T6g;
Chris@42 1333 R1[WS(rs, 27)] = T6f + T6g;
Chris@42 1334 T6j = T6h - T6i;
Chris@42 1335 T6m = FNMS(KP1_990369453, T6l, KP196034280 * T6k);
Chris@42 1336 R1[WS(rs, 23)] = T6j - T6m;
Chris@42 1337 R1[WS(rs, 7)] = T6j + T6m;
Chris@42 1338 }
Chris@42 1339 }
Chris@42 1340 {
Chris@42 1341 E T4X, T5b, T50, T5c, T54, T5e, T57, T5f, T4Y, T4Z;
Chris@42 1342 T4X = T4V - T4W;
Chris@42 1343 T5b = T4V + T4W;
Chris@42 1344 T4Y = T3Q - T3X;
Chris@42 1345 T4Z = T44 - T41;
Chris@42 1346 T50 = FNMS(KP1_662939224, T4Z, KP1_111140466 * T4Y);
Chris@42 1347 T5c = FMA(KP1_662939224, T4Y, KP1_111140466 * T4Z);
Chris@42 1348 {
Chris@42 1349 E T52, T53, T55, T56;
Chris@42 1350 T52 = T4a - T4h;
Chris@42 1351 T53 = T4A - T4z;
Chris@42 1352 T54 = T52 + T53;
Chris@42 1353 T5e = T52 - T53;
Chris@42 1354 T55 = T4p - T4w;
Chris@42 1355 T56 = T4H - T4E;
Chris@42 1356 T57 = T55 + T56;
Chris@42 1357 T5f = T56 - T55;
Chris@42 1358 }
Chris@42 1359 {
Chris@42 1360 E T51, T58, T5h, T5i;
Chris@42 1361 T51 = T4X + T50;
Chris@42 1362 T58 = FNMS(KP942793473, T57, KP1_763842528 * T54);
Chris@42 1363 R1[WS(rs, 18)] = T51 - T58;
Chris@42 1364 R1[WS(rs, 2)] = T51 + T58;
Chris@42 1365 T5h = T5b + T5c;
Chris@42 1366 T5i = FMA(KP1_913880671, T5e, KP580569354 * T5f);
Chris@42 1367 R1[WS(rs, 14)] = T5h - T5i;
Chris@42 1368 R1[WS(rs, 30)] = T5h + T5i;
Chris@42 1369 }
Chris@42 1370 {
Chris@42 1371 E T59, T5a, T5d, T5g;
Chris@42 1372 T59 = T4X - T50;
Chris@42 1373 T5a = FMA(KP942793473, T54, KP1_763842528 * T57);
Chris@42 1374 R1[WS(rs, 10)] = T59 - T5a;
Chris@42 1375 R1[WS(rs, 26)] = T59 + T5a;
Chris@42 1376 T5d = T5b - T5c;
Chris@42 1377 T5g = FNMS(KP1_913880671, T5f, KP580569354 * T5e);
Chris@42 1378 R1[WS(rs, 22)] = T5d - T5g;
Chris@42 1379 R1[WS(rs, 6)] = T5d + T5g;
Chris@42 1380 }
Chris@42 1381 }
Chris@42 1382 }
Chris@42 1383 }
Chris@42 1384 }
Chris@42 1385
Chris@42 1386 static const kr2c_desc desc = { 64, "r2cb_64", {342, 82, 52, 0}, &GENUS };
Chris@42 1387
Chris@42 1388 void X(codelet_r2cb_64) (planner *p) {
Chris@42 1389 X(kr2c_register) (p, r2cb_64, &desc);
Chris@42 1390 }
Chris@42 1391
Chris@42 1392 #endif /* HAVE_FMA */