annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:08 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 394 FP additions, 196 FP multiplications,
Chris@42 32 * (or, 198 additions, 0 multiplications, 196 fused multiply/add),
Chris@42 33 * 133 stack variables, 15 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 40 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 41 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 42 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 43 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 44 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 45 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 46 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 47 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 48 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 49 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 50 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 51 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 52 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 53 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 54 {
Chris@42 55 INT i;
Chris@42 56 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 57 E T5n, T5o;
Chris@42 58 {
Chris@42 59 E T11, T2j, T4P, T5P, T3D, T5p, T3d, Tf, T1k, T1H, T5D, T4l, T5A, T4a, T3i;
Chris@42 60 E T2U, T1R, T2e, T5K, T4G, T5H, T4v, T3l, T31, T5s, T42, T5t, T3Z, T2n, T1b;
Chris@42 61 E T3f, TZ, T5v, T3T, T5w, T3Q, T2m, T18, T3e, TK, T3K, T5Q, T4S, T5q, T14;
Chris@42 62 E T2k, T3p, Tu, T4w, T1U, T5E, T4h, T5B, T4o, T3j, T2X, T1I, T1z, T1Z, T4A;
Chris@42 63 E T24, T4x, T1X, T20;
Chris@42 64 {
Chris@42 65 E TN, T3V, TS, TX, T3X, TQ, T40, TT;
Chris@42 66 {
Chris@42 67 E T1g, T46, T1B, T1G, T47, T1j, T4j, T1C;
Chris@42 68 {
Chris@42 69 E T4, T3z, T3, T3B, Td, T5, T8, T9;
Chris@42 70 {
Chris@42 71 E T1, T2, Tb, Tc;
Chris@42 72 T1 = R0[0];
Chris@42 73 T2 = R0[WS(rs, 16)];
Chris@42 74 Tb = R0[WS(rs, 28)];
Chris@42 75 Tc = R0[WS(rs, 12)];
Chris@42 76 T4 = R0[WS(rs, 8)];
Chris@42 77 T3z = T1 - T2;
Chris@42 78 T3 = T1 + T2;
Chris@42 79 T3B = Tb - Tc;
Chris@42 80 Td = Tb + Tc;
Chris@42 81 T5 = R0[WS(rs, 24)];
Chris@42 82 T8 = R0[WS(rs, 4)];
Chris@42 83 T9 = R0[WS(rs, 20)];
Chris@42 84 }
Chris@42 85 {
Chris@42 86 E T1E, T1F, T1h, T1i;
Chris@42 87 {
Chris@42 88 E T1e, T4N, T6, T3A, Ta, T1f;
Chris@42 89 T1e = R1[0];
Chris@42 90 T4N = T4 - T5;
Chris@42 91 T6 = T4 + T5;
Chris@42 92 T3A = T8 - T9;
Chris@42 93 Ta = T8 + T9;
Chris@42 94 T1f = R1[WS(rs, 16)];
Chris@42 95 {
Chris@42 96 E T7, T3C, T4O, Te;
Chris@42 97 T11 = T3 - T6;
Chris@42 98 T7 = T3 + T6;
Chris@42 99 T3C = T3A + T3B;
Chris@42 100 T4O = T3B - T3A;
Chris@42 101 T2j = Td - Ta;
Chris@42 102 Te = Ta + Td;
Chris@42 103 T4P = FNMS(KP707106781, T4O, T4N);
Chris@42 104 T5P = FMA(KP707106781, T4O, T4N);
Chris@42 105 T3D = FMA(KP707106781, T3C, T3z);
Chris@42 106 T5p = FNMS(KP707106781, T3C, T3z);
Chris@42 107 T3d = T7 - Te;
Chris@42 108 Tf = T7 + Te;
Chris@42 109 T1g = T1e + T1f;
Chris@42 110 T46 = T1e - T1f;
Chris@42 111 }
Chris@42 112 }
Chris@42 113 T1E = R1[WS(rs, 4)];
Chris@42 114 T1F = R1[WS(rs, 20)];
Chris@42 115 T1h = R1[WS(rs, 8)];
Chris@42 116 T1i = R1[WS(rs, 24)];
Chris@42 117 T1B = R1[WS(rs, 28)];
Chris@42 118 T1G = T1E + T1F;
Chris@42 119 T47 = T1E - T1F;
Chris@42 120 T1j = T1h + T1i;
Chris@42 121 T4j = T1h - T1i;
Chris@42 122 T1C = R1[WS(rs, 12)];
Chris@42 123 }
Chris@42 124 }
Chris@42 125 {
Chris@42 126 E T1N, T4r, T28, T2d, T4s, T1Q, T4E, T29;
Chris@42 127 {
Chris@42 128 E T2b, T2c, T1O, T1P;
Chris@42 129 {
Chris@42 130 E T2S, T48, T1D, T1L, T1M, T4k, T49, T2T;
Chris@42 131 T1L = R1[WS(rs, 31)];
Chris@42 132 T1M = R1[WS(rs, 15)];
Chris@42 133 T2S = T1g + T1j;
Chris@42 134 T1k = T1g - T1j;
Chris@42 135 T48 = T1B - T1C;
Chris@42 136 T1D = T1B + T1C;
Chris@42 137 T1N = T1L + T1M;
Chris@42 138 T4r = T1L - T1M;
Chris@42 139 T4k = T47 - T48;
Chris@42 140 T49 = T47 + T48;
Chris@42 141 T2T = T1G + T1D;
Chris@42 142 T1H = T1D - T1G;
Chris@42 143 T5D = FNMS(KP707106781, T4k, T4j);
Chris@42 144 T4l = FMA(KP707106781, T4k, T4j);
Chris@42 145 T5A = FNMS(KP707106781, T49, T46);
Chris@42 146 T4a = FMA(KP707106781, T49, T46);
Chris@42 147 T3i = T2S - T2T;
Chris@42 148 T2U = T2S + T2T;
Chris@42 149 T2b = R1[WS(rs, 3)];
Chris@42 150 T2c = R1[WS(rs, 19)];
Chris@42 151 }
Chris@42 152 T1O = R1[WS(rs, 7)];
Chris@42 153 T1P = R1[WS(rs, 23)];
Chris@42 154 T28 = R1[WS(rs, 27)];
Chris@42 155 T2d = T2b + T2c;
Chris@42 156 T4s = T2b - T2c;
Chris@42 157 T1Q = T1O + T1P;
Chris@42 158 T4E = T1P - T1O;
Chris@42 159 T29 = R1[WS(rs, 11)];
Chris@42 160 }
Chris@42 161 {
Chris@42 162 E TV, TW, TO, TP;
Chris@42 163 {
Chris@42 164 E T2Z, T4t, T2a, TL, TM, T4F, T4u, T30;
Chris@42 165 TL = R0[WS(rs, 31)];
Chris@42 166 TM = R0[WS(rs, 15)];
Chris@42 167 T2Z = T1N + T1Q;
Chris@42 168 T1R = T1N - T1Q;
Chris@42 169 T4t = T28 - T29;
Chris@42 170 T2a = T28 + T29;
Chris@42 171 TN = TL + TM;
Chris@42 172 T3V = TL - TM;
Chris@42 173 T4F = T4t - T4s;
Chris@42 174 T4u = T4s + T4t;
Chris@42 175 T30 = T2d + T2a;
Chris@42 176 T2e = T2a - T2d;
Chris@42 177 T5K = FNMS(KP707106781, T4F, T4E);
Chris@42 178 T4G = FMA(KP707106781, T4F, T4E);
Chris@42 179 T5H = FNMS(KP707106781, T4u, T4r);
Chris@42 180 T4v = FMA(KP707106781, T4u, T4r);
Chris@42 181 T3l = T2Z - T30;
Chris@42 182 T31 = T2Z + T30;
Chris@42 183 TV = R0[WS(rs, 27)];
Chris@42 184 TW = R0[WS(rs, 11)];
Chris@42 185 }
Chris@42 186 TO = R0[WS(rs, 7)];
Chris@42 187 TP = R0[WS(rs, 23)];
Chris@42 188 TS = R0[WS(rs, 3)];
Chris@42 189 TX = TV + TW;
Chris@42 190 T3X = TV - TW;
Chris@42 191 TQ = TO + TP;
Chris@42 192 T40 = TO - TP;
Chris@42 193 TT = R0[WS(rs, 19)];
Chris@42 194 }
Chris@42 195 }
Chris@42 196 }
Chris@42 197 {
Chris@42 198 E Ti, T3E, Tn, Ts, T3I, Tl, T3F, To;
Chris@42 199 {
Chris@42 200 E Ty, T3M, TD, TI, T3O, TB, T3R, TE;
Chris@42 201 {
Chris@42 202 E TG, TH, Tz, TA;
Chris@42 203 {
Chris@42 204 E T19, TR, T3W, TU, Tw, Tx;
Chris@42 205 Tw = R0[WS(rs, 1)];
Chris@42 206 Tx = R0[WS(rs, 17)];
Chris@42 207 T19 = TN - TQ;
Chris@42 208 TR = TN + TQ;
Chris@42 209 T3W = TS - TT;
Chris@42 210 TU = TS + TT;
Chris@42 211 Ty = Tw + Tx;
Chris@42 212 T3M = Tw - Tx;
Chris@42 213 {
Chris@42 214 E T41, T3Y, T1a, TY;
Chris@42 215 T41 = T3W - T3X;
Chris@42 216 T3Y = T3W + T3X;
Chris@42 217 T1a = TX - TU;
Chris@42 218 TY = TU + TX;
Chris@42 219 T5s = FNMS(KP707106781, T41, T40);
Chris@42 220 T42 = FMA(KP707106781, T41, T40);
Chris@42 221 T5t = FNMS(KP707106781, T3Y, T3V);
Chris@42 222 T3Z = FMA(KP707106781, T3Y, T3V);
Chris@42 223 T2n = FMA(KP414213562, T19, T1a);
Chris@42 224 T1b = FNMS(KP414213562, T1a, T19);
Chris@42 225 T3f = TR - TY;
Chris@42 226 TZ = TR + TY;
Chris@42 227 TG = R0[WS(rs, 29)];
Chris@42 228 TH = R0[WS(rs, 13)];
Chris@42 229 }
Chris@42 230 }
Chris@42 231 Tz = R0[WS(rs, 9)];
Chris@42 232 TA = R0[WS(rs, 25)];
Chris@42 233 TD = R0[WS(rs, 5)];
Chris@42 234 TI = TG + TH;
Chris@42 235 T3O = TG - TH;
Chris@42 236 TB = Tz + TA;
Chris@42 237 T3R = Tz - TA;
Chris@42 238 TE = R0[WS(rs, 21)];
Chris@42 239 }
Chris@42 240 {
Chris@42 241 E Tq, Tr, Tj, Tk;
Chris@42 242 {
Chris@42 243 E T16, TC, T3N, TF, Tg, Th;
Chris@42 244 Tg = R0[WS(rs, 2)];
Chris@42 245 Th = R0[WS(rs, 18)];
Chris@42 246 T16 = Ty - TB;
Chris@42 247 TC = Ty + TB;
Chris@42 248 T3N = TD - TE;
Chris@42 249 TF = TD + TE;
Chris@42 250 Ti = Tg + Th;
Chris@42 251 T3E = Tg - Th;
Chris@42 252 {
Chris@42 253 E T3S, T3P, T17, TJ;
Chris@42 254 T3S = T3N - T3O;
Chris@42 255 T3P = T3N + T3O;
Chris@42 256 T17 = TI - TF;
Chris@42 257 TJ = TF + TI;
Chris@42 258 T5v = FNMS(KP707106781, T3S, T3R);
Chris@42 259 T3T = FMA(KP707106781, T3S, T3R);
Chris@42 260 T5w = FNMS(KP707106781, T3P, T3M);
Chris@42 261 T3Q = FMA(KP707106781, T3P, T3M);
Chris@42 262 T2m = FNMS(KP414213562, T16, T17);
Chris@42 263 T18 = FMA(KP414213562, T17, T16);
Chris@42 264 T3e = TC - TJ;
Chris@42 265 TK = TC + TJ;
Chris@42 266 Tq = R0[WS(rs, 6)];
Chris@42 267 Tr = R0[WS(rs, 22)];
Chris@42 268 }
Chris@42 269 }
Chris@42 270 Tj = R0[WS(rs, 10)];
Chris@42 271 Tk = R0[WS(rs, 26)];
Chris@42 272 Tn = R0[WS(rs, 30)];
Chris@42 273 Ts = Tq + Tr;
Chris@42 274 T3I = Tq - Tr;
Chris@42 275 Tl = Tj + Tk;
Chris@42 276 T3F = Tj - Tk;
Chris@42 277 To = R0[WS(rs, 14)];
Chris@42 278 }
Chris@42 279 }
Chris@42 280 {
Chris@42 281 E T1n, T4b, T1s, T4f, T1x, T4c, T1q, T1t;
Chris@42 282 {
Chris@42 283 E T1v, T1w, T1o, T1p;
Chris@42 284 {
Chris@42 285 E T1l, T4Q, T3G, Tm, T12, Tp, T3H, T1m;
Chris@42 286 T1l = R1[WS(rs, 2)];
Chris@42 287 T4Q = FMA(KP414213562, T3E, T3F);
Chris@42 288 T3G = FNMS(KP414213562, T3F, T3E);
Chris@42 289 Tm = Ti + Tl;
Chris@42 290 T12 = Ti - Tl;
Chris@42 291 Tp = Tn + To;
Chris@42 292 T3H = Tn - To;
Chris@42 293 T1m = R1[WS(rs, 18)];
Chris@42 294 T1v = R1[WS(rs, 6)];
Chris@42 295 {
Chris@42 296 E T4R, T3J, Tt, T13;
Chris@42 297 T4R = FNMS(KP414213562, T3H, T3I);
Chris@42 298 T3J = FMA(KP414213562, T3I, T3H);
Chris@42 299 Tt = Tp + Ts;
Chris@42 300 T13 = Tp - Ts;
Chris@42 301 T1n = T1l + T1m;
Chris@42 302 T4b = T1l - T1m;
Chris@42 303 T3K = T3G + T3J;
Chris@42 304 T5Q = T3J - T3G;
Chris@42 305 T4S = T4Q + T4R;
Chris@42 306 T5q = T4Q - T4R;
Chris@42 307 T14 = T12 + T13;
Chris@42 308 T2k = T13 - T12;
Chris@42 309 T3p = Tt - Tm;
Chris@42 310 Tu = Tm + Tt;
Chris@42 311 T1w = R1[WS(rs, 22)];
Chris@42 312 }
Chris@42 313 }
Chris@42 314 T1o = R1[WS(rs, 10)];
Chris@42 315 T1p = R1[WS(rs, 26)];
Chris@42 316 T1s = R1[WS(rs, 30)];
Chris@42 317 T4f = T1v - T1w;
Chris@42 318 T1x = T1v + T1w;
Chris@42 319 T4c = T1o - T1p;
Chris@42 320 T1q = T1o + T1p;
Chris@42 321 T1t = R1[WS(rs, 14)];
Chris@42 322 }
Chris@42 323 {
Chris@42 324 E T22, T23, T1V, T1W;
Chris@42 325 {
Chris@42 326 E T1S, T4d, T4m, T2V, T1r, T4e, T1u, T1T;
Chris@42 327 T1S = R1[WS(rs, 1)];
Chris@42 328 T4d = FNMS(KP414213562, T4c, T4b);
Chris@42 329 T4m = FMA(KP414213562, T4b, T4c);
Chris@42 330 T2V = T1n + T1q;
Chris@42 331 T1r = T1n - T1q;
Chris@42 332 T4e = T1s - T1t;
Chris@42 333 T1u = T1s + T1t;
Chris@42 334 T1T = R1[WS(rs, 17)];
Chris@42 335 T22 = R1[WS(rs, 5)];
Chris@42 336 {
Chris@42 337 E T4g, T4n, T2W, T1y;
Chris@42 338 T4g = FMA(KP414213562, T4f, T4e);
Chris@42 339 T4n = FNMS(KP414213562, T4e, T4f);
Chris@42 340 T2W = T1u + T1x;
Chris@42 341 T1y = T1u - T1x;
Chris@42 342 T4w = T1S - T1T;
Chris@42 343 T1U = T1S + T1T;
Chris@42 344 T5E = T4g - T4d;
Chris@42 345 T4h = T4d + T4g;
Chris@42 346 T5B = T4m - T4n;
Chris@42 347 T4o = T4m + T4n;
Chris@42 348 T3j = T2W - T2V;
Chris@42 349 T2X = T2V + T2W;
Chris@42 350 T1I = T1y - T1r;
Chris@42 351 T1z = T1r + T1y;
Chris@42 352 T23 = R1[WS(rs, 21)];
Chris@42 353 }
Chris@42 354 }
Chris@42 355 T1V = R1[WS(rs, 9)];
Chris@42 356 T1W = R1[WS(rs, 25)];
Chris@42 357 T1Z = R1[WS(rs, 29)];
Chris@42 358 T4A = T23 - T22;
Chris@42 359 T24 = T22 + T23;
Chris@42 360 T4x = T1W - T1V;
Chris@42 361 T1X = T1V + T1W;
Chris@42 362 T20 = R1[WS(rs, 13)];
Chris@42 363 }
Chris@42 364 }
Chris@42 365 }
Chris@42 366 }
Chris@42 367 {
Chris@42 368 E T4C, T5L, T4J, T5I, T26, T2f, T3q, T3h, T3w, T3s, T3o, T3r, T3t;
Chris@42 369 {
Chris@42 370 E T2R, T37, T2Y, T3a, T39, T3m, T3b, T35, Tv, T10, T34, T3c, T3x, T3y;
Chris@42 371 {
Chris@42 372 E T4y, T4H, T32, T1Y, T4z, T21;
Chris@42 373 T2R = Tf - Tu;
Chris@42 374 Tv = Tf + Tu;
Chris@42 375 T4y = FMA(KP414213562, T4x, T4w);
Chris@42 376 T4H = FNMS(KP414213562, T4w, T4x);
Chris@42 377 T32 = T1U + T1X;
Chris@42 378 T1Y = T1U - T1X;
Chris@42 379 T4z = T1Z - T20;
Chris@42 380 T21 = T1Z + T20;
Chris@42 381 T10 = TK + TZ;
Chris@42 382 T37 = TZ - TK;
Chris@42 383 T2Y = T2U - T2X;
Chris@42 384 T3a = T2U + T2X;
Chris@42 385 {
Chris@42 386 E T4B, T4I, T33, T25;
Chris@42 387 T4B = FNMS(KP414213562, T4A, T4z);
Chris@42 388 T4I = FMA(KP414213562, T4z, T4A);
Chris@42 389 T33 = T21 + T24;
Chris@42 390 T25 = T21 - T24;
Chris@42 391 T39 = Tv + T10;
Chris@42 392 T4C = T4y + T4B;
Chris@42 393 T5L = T4B - T4y;
Chris@42 394 T4J = T4H + T4I;
Chris@42 395 T5I = T4I - T4H;
Chris@42 396 T34 = T32 + T33;
Chris@42 397 T3m = T33 - T32;
Chris@42 398 T26 = T1Y + T25;
Chris@42 399 T2f = T25 - T1Y;
Chris@42 400 }
Chris@42 401 }
Chris@42 402 Cr[WS(csr, 16)] = Tv - T10;
Chris@42 403 T3b = T31 + T34;
Chris@42 404 T35 = T31 - T34;
Chris@42 405 Ci[WS(csi, 16)] = T3b - T3a;
Chris@42 406 T3c = T3a + T3b;
Chris@42 407 {
Chris@42 408 E T3k, T3u, T3v, T3n, T36, T38, T3g;
Chris@42 409 T3g = T3e + T3f;
Chris@42 410 T3q = T3f - T3e;
Chris@42 411 Cr[0] = T39 + T3c;
Chris@42 412 Cr[WS(csr, 32)] = T39 - T3c;
Chris@42 413 T36 = T2Y + T35;
Chris@42 414 T38 = T35 - T2Y;
Chris@42 415 T3x = FNMS(KP707106781, T3g, T3d);
Chris@42 416 T3h = FMA(KP707106781, T3g, T3d);
Chris@42 417 Ci[WS(csi, 8)] = FMA(KP707106781, T38, T37);
Chris@42 418 Ci[WS(csi, 24)] = FMS(KP707106781, T38, T37);
Chris@42 419 Cr[WS(csr, 8)] = FMA(KP707106781, T36, T2R);
Chris@42 420 Cr[WS(csr, 24)] = FNMS(KP707106781, T36, T2R);
Chris@42 421 T3k = FMA(KP414213562, T3j, T3i);
Chris@42 422 T3u = FNMS(KP414213562, T3i, T3j);
Chris@42 423 T3v = FMA(KP414213562, T3l, T3m);
Chris@42 424 T3n = FNMS(KP414213562, T3m, T3l);
Chris@42 425 T3y = T3v - T3u;
Chris@42 426 T3w = T3u + T3v;
Chris@42 427 T3s = T3n - T3k;
Chris@42 428 T3o = T3k + T3n;
Chris@42 429 }
Chris@42 430 Cr[WS(csr, 12)] = FMA(KP923879532, T3y, T3x);
Chris@42 431 Cr[WS(csr, 20)] = FNMS(KP923879532, T3y, T3x);
Chris@42 432 }
Chris@42 433 Cr[WS(csr, 4)] = FMA(KP923879532, T3o, T3h);
Chris@42 434 Cr[WS(csr, 28)] = FNMS(KP923879532, T3o, T3h);
Chris@42 435 T3r = FNMS(KP707106781, T3q, T3p);
Chris@42 436 T3t = FMA(KP707106781, T3q, T3p);
Chris@42 437 {
Chris@42 438 E T27, T2g, T2v, T1d, T2r, T2p, T2s, T1K, T6l, T6m;
Chris@42 439 {
Chris@42 440 E T15, T2o, T2P, T2z, T2l, T1c, T1A, T1J, T2D, T2L, T2J, T2M, T2C, T2E, T2N;
Chris@42 441 E T2F;
Chris@42 442 {
Chris@42 443 E T2H, T2I, T2x, T2y, T2A, T2B;
Chris@42 444 T15 = FMA(KP707106781, T14, T11);
Chris@42 445 T2x = FNMS(KP707106781, T14, T11);
Chris@42 446 T2y = T2n - T2m;
Chris@42 447 T2o = T2m + T2n;
Chris@42 448 Ci[WS(csi, 4)] = FMA(KP923879532, T3w, T3t);
Chris@42 449 Ci[WS(csi, 28)] = FMS(KP923879532, T3w, T3t);
Chris@42 450 Ci[WS(csi, 20)] = FMA(KP923879532, T3s, T3r);
Chris@42 451 Ci[WS(csi, 12)] = FMS(KP923879532, T3s, T3r);
Chris@42 452 T2P = FNMS(KP923879532, T2y, T2x);
Chris@42 453 T2z = FMA(KP923879532, T2y, T2x);
Chris@42 454 T2l = FMA(KP707106781, T2k, T2j);
Chris@42 455 T2H = FNMS(KP707106781, T2k, T2j);
Chris@42 456 T2I = T1b - T18;
Chris@42 457 T1c = T18 + T1b;
Chris@42 458 T1A = FMA(KP707106781, T1z, T1k);
Chris@42 459 T2A = FNMS(KP707106781, T1z, T1k);
Chris@42 460 T2B = FNMS(KP707106781, T1I, T1H);
Chris@42 461 T1J = FMA(KP707106781, T1I, T1H);
Chris@42 462 T27 = FMA(KP707106781, T26, T1R);
Chris@42 463 T2D = FNMS(KP707106781, T26, T1R);
Chris@42 464 T2L = FNMS(KP923879532, T2I, T2H);
Chris@42 465 T2J = FMA(KP923879532, T2I, T2H);
Chris@42 466 T2M = FMA(KP668178637, T2A, T2B);
Chris@42 467 T2C = FNMS(KP668178637, T2B, T2A);
Chris@42 468 T2E = FNMS(KP707106781, T2f, T2e);
Chris@42 469 T2g = FMA(KP707106781, T2f, T2e);
Chris@42 470 }
Chris@42 471 T2N = FNMS(KP668178637, T2D, T2E);
Chris@42 472 T2F = FMA(KP668178637, T2E, T2D);
Chris@42 473 T2v = FNMS(KP923879532, T1c, T15);
Chris@42 474 T1d = FMA(KP923879532, T1c, T15);
Chris@42 475 {
Chris@42 476 E T2Q, T2O, T2K, T2G;
Chris@42 477 T2Q = T2M - T2N;
Chris@42 478 T2O = T2M + T2N;
Chris@42 479 T2K = T2F - T2C;
Chris@42 480 T2G = T2C + T2F;
Chris@42 481 Cr[WS(csr, 10)] = FMA(KP831469612, T2Q, T2P);
Chris@42 482 Cr[WS(csr, 22)] = FNMS(KP831469612, T2Q, T2P);
Chris@42 483 Ci[WS(csi, 26)] = FNMS(KP831469612, T2O, T2L);
Chris@42 484 Ci[WS(csi, 6)] = -(FMA(KP831469612, T2O, T2L));
Chris@42 485 Ci[WS(csi, 22)] = FMS(KP831469612, T2K, T2J);
Chris@42 486 Ci[WS(csi, 10)] = FMA(KP831469612, T2K, T2J);
Chris@42 487 Cr[WS(csr, 6)] = FMA(KP831469612, T2G, T2z);
Chris@42 488 Cr[WS(csr, 26)] = FNMS(KP831469612, T2G, T2z);
Chris@42 489 }
Chris@42 490 T2r = FMA(KP923879532, T2o, T2l);
Chris@42 491 T2p = FNMS(KP923879532, T2o, T2l);
Chris@42 492 T2s = FNMS(KP198912367, T1A, T1J);
Chris@42 493 T1K = FMA(KP198912367, T1J, T1A);
Chris@42 494 }
Chris@42 495 {
Chris@42 496 E T63, T5r, T5R, T6d, T5J, T5M, T6e, T5y, T6j, T6b, T66, T67, T64, T5U, T5Z;
Chris@42 497 E T5G;
Chris@42 498 {
Chris@42 499 E T5S, T5u, T5x, T5T, T2t, T2h;
Chris@42 500 T63 = FMA(KP923879532, T5q, T5p);
Chris@42 501 T5r = FNMS(KP923879532, T5q, T5p);
Chris@42 502 T5R = FNMS(KP923879532, T5Q, T5P);
Chris@42 503 T6d = FMA(KP923879532, T5Q, T5P);
Chris@42 504 T2t = FMA(KP198912367, T27, T2g);
Chris@42 505 T2h = FNMS(KP198912367, T2g, T27);
Chris@42 506 T5S = FNMS(KP668178637, T5s, T5t);
Chris@42 507 T5u = FMA(KP668178637, T5t, T5s);
Chris@42 508 {
Chris@42 509 E T2w, T2u, T2q, T2i;
Chris@42 510 T2w = T2t - T2s;
Chris@42 511 T2u = T2s + T2t;
Chris@42 512 T2q = T2h - T1K;
Chris@42 513 T2i = T1K + T2h;
Chris@42 514 Cr[WS(csr, 14)] = FMA(KP980785280, T2w, T2v);
Chris@42 515 Cr[WS(csr, 18)] = FNMS(KP980785280, T2w, T2v);
Chris@42 516 Ci[WS(csi, 30)] = FMS(KP980785280, T2u, T2r);
Chris@42 517 Ci[WS(csi, 2)] = FMA(KP980785280, T2u, T2r);
Chris@42 518 Ci[WS(csi, 18)] = FMA(KP980785280, T2q, T2p);
Chris@42 519 Ci[WS(csi, 14)] = FMS(KP980785280, T2q, T2p);
Chris@42 520 Cr[WS(csr, 2)] = FMA(KP980785280, T2i, T1d);
Chris@42 521 Cr[WS(csr, 30)] = FNMS(KP980785280, T2i, T1d);
Chris@42 522 T5x = FNMS(KP668178637, T5w, T5v);
Chris@42 523 T5T = FMA(KP668178637, T5v, T5w);
Chris@42 524 }
Chris@42 525 {
Chris@42 526 E T69, T6a, T5C, T5F;
Chris@42 527 T5J = FNMS(KP923879532, T5I, T5H);
Chris@42 528 T69 = FMA(KP923879532, T5I, T5H);
Chris@42 529 T6a = FNMS(KP923879532, T5L, T5K);
Chris@42 530 T5M = FMA(KP923879532, T5L, T5K);
Chris@42 531 T6e = T5x + T5u;
Chris@42 532 T5y = T5u - T5x;
Chris@42 533 T6j = FNMS(KP303346683, T69, T6a);
Chris@42 534 T6b = FMA(KP303346683, T6a, T69);
Chris@42 535 T66 = FMA(KP923879532, T5B, T5A);
Chris@42 536 T5C = FNMS(KP923879532, T5B, T5A);
Chris@42 537 T5F = FNMS(KP923879532, T5E, T5D);
Chris@42 538 T67 = FMA(KP923879532, T5E, T5D);
Chris@42 539 T64 = T5T + T5S;
Chris@42 540 T5U = T5S - T5T;
Chris@42 541 T5Z = FMA(KP534511135, T5C, T5F);
Chris@42 542 T5G = FNMS(KP534511135, T5F, T5C);
Chris@42 543 }
Chris@42 544 }
Chris@42 545 {
Chris@42 546 E T61, T6i, T68, T62;
Chris@42 547 {
Chris@42 548 E T5z, T5Y, T5N, T5X, T5V, T60, T5W, T5O;
Chris@42 549 T61 = FNMS(KP831469612, T5y, T5r);
Chris@42 550 T5z = FMA(KP831469612, T5y, T5r);
Chris@42 551 T6i = FNMS(KP303346683, T66, T67);
Chris@42 552 T68 = FMA(KP303346683, T67, T66);
Chris@42 553 T5Y = FMA(KP534511135, T5J, T5M);
Chris@42 554 T5N = FNMS(KP534511135, T5M, T5J);
Chris@42 555 T5X = FNMS(KP831469612, T5U, T5R);
Chris@42 556 T5V = FMA(KP831469612, T5U, T5R);
Chris@42 557 T60 = T5Y - T5Z;
Chris@42 558 T62 = T5Z + T5Y;
Chris@42 559 T5W = T5N - T5G;
Chris@42 560 T5O = T5G + T5N;
Chris@42 561 Ci[WS(csi, 27)] = FMA(KP881921264, T60, T5X);
Chris@42 562 Ci[WS(csi, 5)] = FMS(KP881921264, T60, T5X);
Chris@42 563 Cr[WS(csr, 5)] = FMA(KP881921264, T5O, T5z);
Chris@42 564 Cr[WS(csr, 27)] = FNMS(KP881921264, T5O, T5z);
Chris@42 565 Ci[WS(csi, 21)] = FMS(KP881921264, T5W, T5V);
Chris@42 566 Ci[WS(csi, 11)] = FMA(KP881921264, T5W, T5V);
Chris@42 567 }
Chris@42 568 {
Chris@42 569 E T6g, T6f, T6h, T6k, T65, T6c;
Chris@42 570 T6l = FNMS(KP831469612, T64, T63);
Chris@42 571 T65 = FMA(KP831469612, T64, T63);
Chris@42 572 T6c = T68 + T6b;
Chris@42 573 T6g = T6b - T68;
Chris@42 574 T6f = FNMS(KP831469612, T6e, T6d);
Chris@42 575 T6h = FMA(KP831469612, T6e, T6d);
Chris@42 576 Cr[WS(csr, 11)] = FMA(KP881921264, T62, T61);
Chris@42 577 Cr[WS(csr, 21)] = FNMS(KP881921264, T62, T61);
Chris@42 578 Cr[WS(csr, 3)] = FMA(KP956940335, T6c, T65);
Chris@42 579 Cr[WS(csr, 29)] = FNMS(KP956940335, T6c, T65);
Chris@42 580 T6k = T6i - T6j;
Chris@42 581 T6m = T6i + T6j;
Chris@42 582 Ci[WS(csi, 29)] = FMS(KP956940335, T6k, T6h);
Chris@42 583 Ci[WS(csi, 3)] = FMA(KP956940335, T6k, T6h);
Chris@42 584 Ci[WS(csi, 19)] = FMA(KP956940335, T6g, T6f);
Chris@42 585 Ci[WS(csi, 13)] = FMS(KP956940335, T6g, T6f);
Chris@42 586 }
Chris@42 587 }
Chris@42 588 }
Chris@42 589 {
Chris@42 590 E T55, T3L, T4T, T5f, T4D, T4K, T5g, T44, T5l, T5d, T58, T59, T56, T4W, T51;
Chris@42 591 E T4q;
Chris@42 592 {
Chris@42 593 E T4U, T3U, T43, T4V;
Chris@42 594 T55 = FNMS(KP923879532, T3K, T3D);
Chris@42 595 T3L = FMA(KP923879532, T3K, T3D);
Chris@42 596 T4T = FMA(KP923879532, T4S, T4P);
Chris@42 597 T5f = FNMS(KP923879532, T4S, T4P);
Chris@42 598 Cr[WS(csr, 13)] = FNMS(KP956940335, T6m, T6l);
Chris@42 599 Cr[WS(csr, 19)] = FMA(KP956940335, T6m, T6l);
Chris@42 600 T4U = FMA(KP198912367, T3Q, T3T);
Chris@42 601 T3U = FNMS(KP198912367, T3T, T3Q);
Chris@42 602 T43 = FMA(KP198912367, T42, T3Z);
Chris@42 603 T4V = FNMS(KP198912367, T3Z, T42);
Chris@42 604 {
Chris@42 605 E T5b, T5c, T4i, T4p;
Chris@42 606 T4D = FMA(KP923879532, T4C, T4v);
Chris@42 607 T5b = FNMS(KP923879532, T4C, T4v);
Chris@42 608 T5c = FNMS(KP923879532, T4J, T4G);
Chris@42 609 T4K = FMA(KP923879532, T4J, T4G);
Chris@42 610 T5g = T43 - T3U;
Chris@42 611 T44 = T3U + T43;
Chris@42 612 T5l = FNMS(KP820678790, T5b, T5c);
Chris@42 613 T5d = FMA(KP820678790, T5c, T5b);
Chris@42 614 T58 = FNMS(KP923879532, T4h, T4a);
Chris@42 615 T4i = FMA(KP923879532, T4h, T4a);
Chris@42 616 T4p = FMA(KP923879532, T4o, T4l);
Chris@42 617 T59 = FNMS(KP923879532, T4o, T4l);
Chris@42 618 T56 = T4U - T4V;
Chris@42 619 T4W = T4U + T4V;
Chris@42 620 T51 = FMA(KP098491403, T4i, T4p);
Chris@42 621 T4q = FNMS(KP098491403, T4p, T4i);
Chris@42 622 }
Chris@42 623 }
Chris@42 624 {
Chris@42 625 E T53, T5k, T5a, T54;
Chris@42 626 {
Chris@42 627 E T45, T50, T4L, T4Z, T4X, T52, T4Y, T4M;
Chris@42 628 T53 = FNMS(KP980785280, T44, T3L);
Chris@42 629 T45 = FMA(KP980785280, T44, T3L);
Chris@42 630 T5k = FNMS(KP820678790, T58, T59);
Chris@42 631 T5a = FMA(KP820678790, T59, T58);
Chris@42 632 T50 = FMA(KP098491403, T4D, T4K);
Chris@42 633 T4L = FNMS(KP098491403, T4K, T4D);
Chris@42 634 T4Z = FMA(KP980785280, T4W, T4T);
Chris@42 635 T4X = FNMS(KP980785280, T4W, T4T);
Chris@42 636 T52 = T50 - T51;
Chris@42 637 T54 = T51 + T50;
Chris@42 638 T4Y = T4L - T4q;
Chris@42 639 T4M = T4q + T4L;
Chris@42 640 Ci[WS(csi, 31)] = FMA(KP995184726, T52, T4Z);
Chris@42 641 Ci[WS(csi, 1)] = FMS(KP995184726, T52, T4Z);
Chris@42 642 Cr[WS(csr, 1)] = FMA(KP995184726, T4M, T45);
Chris@42 643 Cr[WS(csr, 31)] = FNMS(KP995184726, T4M, T45);
Chris@42 644 Ci[WS(csi, 17)] = FMS(KP995184726, T4Y, T4X);
Chris@42 645 Ci[WS(csi, 15)] = FMA(KP995184726, T4Y, T4X);
Chris@42 646 }
Chris@42 647 {
Chris@42 648 E T5i, T5h, T5j, T5m, T57, T5e;
Chris@42 649 T5n = FNMS(KP980785280, T56, T55);
Chris@42 650 T57 = FMA(KP980785280, T56, T55);
Chris@42 651 T5e = T5a + T5d;
Chris@42 652 T5i = T5d - T5a;
Chris@42 653 T5h = FNMS(KP980785280, T5g, T5f);
Chris@42 654 T5j = FMA(KP980785280, T5g, T5f);
Chris@42 655 Cr[WS(csr, 15)] = FMA(KP995184726, T54, T53);
Chris@42 656 Cr[WS(csr, 17)] = FNMS(KP995184726, T54, T53);
Chris@42 657 Cr[WS(csr, 7)] = FMA(KP773010453, T5e, T57);
Chris@42 658 Cr[WS(csr, 25)] = FNMS(KP773010453, T5e, T57);
Chris@42 659 T5m = T5k - T5l;
Chris@42 660 T5o = T5k + T5l;
Chris@42 661 Ci[WS(csi, 25)] = FMS(KP773010453, T5m, T5j);
Chris@42 662 Ci[WS(csi, 7)] = FMA(KP773010453, T5m, T5j);
Chris@42 663 Ci[WS(csi, 23)] = FMA(KP773010453, T5i, T5h);
Chris@42 664 Ci[WS(csi, 9)] = FMS(KP773010453, T5i, T5h);
Chris@42 665 }
Chris@42 666 }
Chris@42 667 }
Chris@42 668 }
Chris@42 669 }
Chris@42 670 }
Chris@42 671 Cr[WS(csr, 9)] = FNMS(KP773010453, T5o, T5n);
Chris@42 672 Cr[WS(csr, 23)] = FMA(KP773010453, T5o, T5n);
Chris@42 673 }
Chris@42 674 }
Chris@42 675 }
Chris@42 676
Chris@42 677 static const kr2c_desc desc = { 64, "r2cf_64", {198, 0, 196, 0}, &GENUS };
Chris@42 678
Chris@42 679 void X(codelet_r2cf_64) (planner *p) {
Chris@42 680 X(kr2c_register) (p, r2cf_64, &desc);
Chris@42 681 }
Chris@42 682
Chris@42 683 #else /* HAVE_FMA */
Chris@42 684
Chris@42 685 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include r2cf.h */
Chris@42 686
Chris@42 687 /*
Chris@42 688 * This function contains 394 FP additions, 124 FP multiplications,
Chris@42 689 * (or, 342 additions, 72 multiplications, 52 fused multiply/add),
Chris@42 690 * 106 stack variables, 15 constants, and 128 memory accesses
Chris@42 691 */
Chris@42 692 #include "r2cf.h"
Chris@42 693
Chris@42 694 static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 695 {
Chris@42 696 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 697 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@42 698 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@42 699 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 700 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@42 701 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 702 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@42 703 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 704 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 705 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 706 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 707 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 708 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 709 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 710 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 711 {
Chris@42 712 INT i;
Chris@42 713 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 714 E T4l, T5a, T15, T3n, T2T, T3Q, T7, Te, Tf, T4A, T4L, T1X, T3B, T23, T3y;
Chris@42 715 E T5I, T66, T4R, T52, T2j, T3F, T2H, T3I, T5P, T69, T1i, T3t, T1l, T3u, TZ;
Chris@42 716 E T63, T4v, T58, T1r, T3r, T1u, T3q, TK, T62, T4s, T57, Tm, Tt, Tu, T4o;
Chris@42 717 E T5b, T1c, T3R, T2Q, T3o, T1M, T3z, T5L, T67, T26, T3C, T4H, T4M, T2y, T3J;
Chris@42 718 E T5S, T6a, T2C, T3G, T4Y, T53;
Chris@42 719 {
Chris@42 720 E T3, T11, Td, T13, T6, T2S, Ta, T12, T14, T2R;
Chris@42 721 {
Chris@42 722 E T1, T2, Tb, Tc;
Chris@42 723 T1 = R0[0];
Chris@42 724 T2 = R0[WS(rs, 16)];
Chris@42 725 T3 = T1 + T2;
Chris@42 726 T11 = T1 - T2;
Chris@42 727 Tb = R0[WS(rs, 28)];
Chris@42 728 Tc = R0[WS(rs, 12)];
Chris@42 729 Td = Tb + Tc;
Chris@42 730 T13 = Tb - Tc;
Chris@42 731 }
Chris@42 732 {
Chris@42 733 E T4, T5, T8, T9;
Chris@42 734 T4 = R0[WS(rs, 8)];
Chris@42 735 T5 = R0[WS(rs, 24)];
Chris@42 736 T6 = T4 + T5;
Chris@42 737 T2S = T4 - T5;
Chris@42 738 T8 = R0[WS(rs, 4)];
Chris@42 739 T9 = R0[WS(rs, 20)];
Chris@42 740 Ta = T8 + T9;
Chris@42 741 T12 = T8 - T9;
Chris@42 742 }
Chris@42 743 T4l = T3 - T6;
Chris@42 744 T5a = Td - Ta;
Chris@42 745 T14 = KP707106781 * (T12 + T13);
Chris@42 746 T15 = T11 + T14;
Chris@42 747 T3n = T11 - T14;
Chris@42 748 T2R = KP707106781 * (T13 - T12);
Chris@42 749 T2T = T2R - T2S;
Chris@42 750 T3Q = T2S + T2R;
Chris@42 751 T7 = T3 + T6;
Chris@42 752 Te = Ta + Td;
Chris@42 753 Tf = T7 + Te;
Chris@42 754 }
Chris@42 755 {
Chris@42 756 E T1P, T4J, T21, T4y, T1S, T4K, T1W, T4z;
Chris@42 757 {
Chris@42 758 E T1N, T1O, T1Z, T20;
Chris@42 759 T1N = R1[WS(rs, 28)];
Chris@42 760 T1O = R1[WS(rs, 12)];
Chris@42 761 T1P = T1N - T1O;
Chris@42 762 T4J = T1N + T1O;
Chris@42 763 T1Z = R1[0];
Chris@42 764 T20 = R1[WS(rs, 16)];
Chris@42 765 T21 = T1Z - T20;
Chris@42 766 T4y = T1Z + T20;
Chris@42 767 }
Chris@42 768 {
Chris@42 769 E T1Q, T1R, T1U, T1V;
Chris@42 770 T1Q = R1[WS(rs, 4)];
Chris@42 771 T1R = R1[WS(rs, 20)];
Chris@42 772 T1S = T1Q - T1R;
Chris@42 773 T4K = T1Q + T1R;
Chris@42 774 T1U = R1[WS(rs, 8)];
Chris@42 775 T1V = R1[WS(rs, 24)];
Chris@42 776 T1W = T1U - T1V;
Chris@42 777 T4z = T1U + T1V;
Chris@42 778 }
Chris@42 779 T4A = T4y - T4z;
Chris@42 780 T4L = T4J - T4K;
Chris@42 781 {
Chris@42 782 E T1T, T22, T5G, T5H;
Chris@42 783 T1T = KP707106781 * (T1P - T1S);
Chris@42 784 T1X = T1T - T1W;
Chris@42 785 T3B = T1W + T1T;
Chris@42 786 T22 = KP707106781 * (T1S + T1P);
Chris@42 787 T23 = T21 + T22;
Chris@42 788 T3y = T21 - T22;
Chris@42 789 T5G = T4y + T4z;
Chris@42 790 T5H = T4K + T4J;
Chris@42 791 T5I = T5G + T5H;
Chris@42 792 T66 = T5G - T5H;
Chris@42 793 }
Chris@42 794 }
Chris@42 795 {
Chris@42 796 E T2b, T4P, T2G, T4Q, T2e, T51, T2h, T50;
Chris@42 797 {
Chris@42 798 E T29, T2a, T2E, T2F;
Chris@42 799 T29 = R1[WS(rs, 31)];
Chris@42 800 T2a = R1[WS(rs, 15)];
Chris@42 801 T2b = T29 - T2a;
Chris@42 802 T4P = T29 + T2a;
Chris@42 803 T2E = R1[WS(rs, 7)];
Chris@42 804 T2F = R1[WS(rs, 23)];
Chris@42 805 T2G = T2E - T2F;
Chris@42 806 T4Q = T2E + T2F;
Chris@42 807 }
Chris@42 808 {
Chris@42 809 E T2c, T2d, T2f, T2g;
Chris@42 810 T2c = R1[WS(rs, 3)];
Chris@42 811 T2d = R1[WS(rs, 19)];
Chris@42 812 T2e = T2c - T2d;
Chris@42 813 T51 = T2c + T2d;
Chris@42 814 T2f = R1[WS(rs, 27)];
Chris@42 815 T2g = R1[WS(rs, 11)];
Chris@42 816 T2h = T2f - T2g;
Chris@42 817 T50 = T2f + T2g;
Chris@42 818 }
Chris@42 819 T4R = T4P - T4Q;
Chris@42 820 T52 = T50 - T51;
Chris@42 821 {
Chris@42 822 E T2i, T2D, T5N, T5O;
Chris@42 823 T2i = KP707106781 * (T2e + T2h);
Chris@42 824 T2j = T2b + T2i;
Chris@42 825 T3F = T2b - T2i;
Chris@42 826 T2D = KP707106781 * (T2h - T2e);
Chris@42 827 T2H = T2D - T2G;
Chris@42 828 T3I = T2G + T2D;
Chris@42 829 T5N = T4P + T4Q;
Chris@42 830 T5O = T51 + T50;
Chris@42 831 T5P = T5N + T5O;
Chris@42 832 T69 = T5N - T5O;
Chris@42 833 }
Chris@42 834 }
Chris@42 835 {
Chris@42 836 E TN, T1e, TX, T1g, TQ, T1k, TU, T1f, T1h, T1j;
Chris@42 837 {
Chris@42 838 E TL, TM, TV, TW;
Chris@42 839 TL = R0[WS(rs, 31)];
Chris@42 840 TM = R0[WS(rs, 15)];
Chris@42 841 TN = TL + TM;
Chris@42 842 T1e = TL - TM;
Chris@42 843 TV = R0[WS(rs, 27)];
Chris@42 844 TW = R0[WS(rs, 11)];
Chris@42 845 TX = TV + TW;
Chris@42 846 T1g = TV - TW;
Chris@42 847 }
Chris@42 848 {
Chris@42 849 E TO, TP, TS, TT;
Chris@42 850 TO = R0[WS(rs, 7)];
Chris@42 851 TP = R0[WS(rs, 23)];
Chris@42 852 TQ = TO + TP;
Chris@42 853 T1k = TO - TP;
Chris@42 854 TS = R0[WS(rs, 3)];
Chris@42 855 TT = R0[WS(rs, 19)];
Chris@42 856 TU = TS + TT;
Chris@42 857 T1f = TS - TT;
Chris@42 858 }
Chris@42 859 T1h = KP707106781 * (T1f + T1g);
Chris@42 860 T1i = T1e + T1h;
Chris@42 861 T3t = T1e - T1h;
Chris@42 862 T1j = KP707106781 * (T1g - T1f);
Chris@42 863 T1l = T1j - T1k;
Chris@42 864 T3u = T1k + T1j;
Chris@42 865 {
Chris@42 866 E TR, TY, T4t, T4u;
Chris@42 867 TR = TN + TQ;
Chris@42 868 TY = TU + TX;
Chris@42 869 TZ = TR + TY;
Chris@42 870 T63 = TR - TY;
Chris@42 871 T4t = TN - TQ;
Chris@42 872 T4u = TX - TU;
Chris@42 873 T4v = FNMS(KP382683432, T4u, KP923879532 * T4t);
Chris@42 874 T58 = FMA(KP382683432, T4t, KP923879532 * T4u);
Chris@42 875 }
Chris@42 876 }
Chris@42 877 {
Chris@42 878 E Ty, T1s, TI, T1n, TB, T1q, TF, T1o, T1p, T1t;
Chris@42 879 {
Chris@42 880 E Tw, Tx, TG, TH;
Chris@42 881 Tw = R0[WS(rs, 1)];
Chris@42 882 Tx = R0[WS(rs, 17)];
Chris@42 883 Ty = Tw + Tx;
Chris@42 884 T1s = Tw - Tx;
Chris@42 885 TG = R0[WS(rs, 29)];
Chris@42 886 TH = R0[WS(rs, 13)];
Chris@42 887 TI = TG + TH;
Chris@42 888 T1n = TG - TH;
Chris@42 889 }
Chris@42 890 {
Chris@42 891 E Tz, TA, TD, TE;
Chris@42 892 Tz = R0[WS(rs, 9)];
Chris@42 893 TA = R0[WS(rs, 25)];
Chris@42 894 TB = Tz + TA;
Chris@42 895 T1q = Tz - TA;
Chris@42 896 TD = R0[WS(rs, 5)];
Chris@42 897 TE = R0[WS(rs, 21)];
Chris@42 898 TF = TD + TE;
Chris@42 899 T1o = TD - TE;
Chris@42 900 }
Chris@42 901 T1p = KP707106781 * (T1n - T1o);
Chris@42 902 T1r = T1p - T1q;
Chris@42 903 T3r = T1q + T1p;
Chris@42 904 T1t = KP707106781 * (T1o + T1n);
Chris@42 905 T1u = T1s + T1t;
Chris@42 906 T3q = T1s - T1t;
Chris@42 907 {
Chris@42 908 E TC, TJ, T4q, T4r;
Chris@42 909 TC = Ty + TB;
Chris@42 910 TJ = TF + TI;
Chris@42 911 TK = TC + TJ;
Chris@42 912 T62 = TC - TJ;
Chris@42 913 T4q = Ty - TB;
Chris@42 914 T4r = TI - TF;
Chris@42 915 T4s = FMA(KP923879532, T4q, KP382683432 * T4r);
Chris@42 916 T57 = FNMS(KP382683432, T4q, KP923879532 * T4r);
Chris@42 917 }
Chris@42 918 }
Chris@42 919 {
Chris@42 920 E Ti, T16, Ts, T1a, Tl, T17, Tp, T19, T4m, T4n;
Chris@42 921 {
Chris@42 922 E Tg, Th, Tq, Tr;
Chris@42 923 Tg = R0[WS(rs, 2)];
Chris@42 924 Th = R0[WS(rs, 18)];
Chris@42 925 Ti = Tg + Th;
Chris@42 926 T16 = Tg - Th;
Chris@42 927 Tq = R0[WS(rs, 6)];
Chris@42 928 Tr = R0[WS(rs, 22)];
Chris@42 929 Ts = Tq + Tr;
Chris@42 930 T1a = Tq - Tr;
Chris@42 931 }
Chris@42 932 {
Chris@42 933 E Tj, Tk, Tn, To;
Chris@42 934 Tj = R0[WS(rs, 10)];
Chris@42 935 Tk = R0[WS(rs, 26)];
Chris@42 936 Tl = Tj + Tk;
Chris@42 937 T17 = Tj - Tk;
Chris@42 938 Tn = R0[WS(rs, 30)];
Chris@42 939 To = R0[WS(rs, 14)];
Chris@42 940 Tp = Tn + To;
Chris@42 941 T19 = Tn - To;
Chris@42 942 }
Chris@42 943 Tm = Ti + Tl;
Chris@42 944 Tt = Tp + Ts;
Chris@42 945 Tu = Tm + Tt;
Chris@42 946 T4m = Ti - Tl;
Chris@42 947 T4n = Tp - Ts;
Chris@42 948 T4o = KP707106781 * (T4m + T4n);
Chris@42 949 T5b = KP707106781 * (T4n - T4m);
Chris@42 950 {
Chris@42 951 E T18, T1b, T2O, T2P;
Chris@42 952 T18 = FNMS(KP382683432, T17, KP923879532 * T16);
Chris@42 953 T1b = FMA(KP923879532, T19, KP382683432 * T1a);
Chris@42 954 T1c = T18 + T1b;
Chris@42 955 T3R = T1b - T18;
Chris@42 956 T2O = FNMS(KP923879532, T1a, KP382683432 * T19);
Chris@42 957 T2P = FMA(KP382683432, T16, KP923879532 * T17);
Chris@42 958 T2Q = T2O - T2P;
Chris@42 959 T3o = T2P + T2O;
Chris@42 960 }
Chris@42 961 }
Chris@42 962 {
Chris@42 963 E T1A, T4E, T1K, T4C, T1D, T4F, T1H, T4B;
Chris@42 964 {
Chris@42 965 E T1y, T1z, T1I, T1J;
Chris@42 966 T1y = R1[WS(rs, 30)];
Chris@42 967 T1z = R1[WS(rs, 14)];
Chris@42 968 T1A = T1y - T1z;
Chris@42 969 T4E = T1y + T1z;
Chris@42 970 T1I = R1[WS(rs, 10)];
Chris@42 971 T1J = R1[WS(rs, 26)];
Chris@42 972 T1K = T1I - T1J;
Chris@42 973 T4C = T1I + T1J;
Chris@42 974 }
Chris@42 975 {
Chris@42 976 E T1B, T1C, T1F, T1G;
Chris@42 977 T1B = R1[WS(rs, 6)];
Chris@42 978 T1C = R1[WS(rs, 22)];
Chris@42 979 T1D = T1B - T1C;
Chris@42 980 T4F = T1B + T1C;
Chris@42 981 T1F = R1[WS(rs, 2)];
Chris@42 982 T1G = R1[WS(rs, 18)];
Chris@42 983 T1H = T1F - T1G;
Chris@42 984 T4B = T1F + T1G;
Chris@42 985 }
Chris@42 986 {
Chris@42 987 E T1E, T1L, T5J, T5K;
Chris@42 988 T1E = FNMS(KP923879532, T1D, KP382683432 * T1A);
Chris@42 989 T1L = FMA(KP382683432, T1H, KP923879532 * T1K);
Chris@42 990 T1M = T1E - T1L;
Chris@42 991 T3z = T1L + T1E;
Chris@42 992 T5J = T4B + T4C;
Chris@42 993 T5K = T4E + T4F;
Chris@42 994 T5L = T5J + T5K;
Chris@42 995 T67 = T5K - T5J;
Chris@42 996 }
Chris@42 997 {
Chris@42 998 E T24, T25, T4D, T4G;
Chris@42 999 T24 = FNMS(KP382683432, T1K, KP923879532 * T1H);
Chris@42 1000 T25 = FMA(KP923879532, T1A, KP382683432 * T1D);
Chris@42 1001 T26 = T24 + T25;
Chris@42 1002 T3C = T25 - T24;
Chris@42 1003 T4D = T4B - T4C;
Chris@42 1004 T4G = T4E - T4F;
Chris@42 1005 T4H = KP707106781 * (T4D + T4G);
Chris@42 1006 T4M = KP707106781 * (T4G - T4D);
Chris@42 1007 }
Chris@42 1008 }
Chris@42 1009 {
Chris@42 1010 E T2m, T4S, T2w, T4W, T2p, T4T, T2t, T4V;
Chris@42 1011 {
Chris@42 1012 E T2k, T2l, T2u, T2v;
Chris@42 1013 T2k = R1[WS(rs, 1)];
Chris@42 1014 T2l = R1[WS(rs, 17)];
Chris@42 1015 T2m = T2k - T2l;
Chris@42 1016 T4S = T2k + T2l;
Chris@42 1017 T2u = R1[WS(rs, 5)];
Chris@42 1018 T2v = R1[WS(rs, 21)];
Chris@42 1019 T2w = T2u - T2v;
Chris@42 1020 T4W = T2u + T2v;
Chris@42 1021 }
Chris@42 1022 {
Chris@42 1023 E T2n, T2o, T2r, T2s;
Chris@42 1024 T2n = R1[WS(rs, 9)];
Chris@42 1025 T2o = R1[WS(rs, 25)];
Chris@42 1026 T2p = T2n - T2o;
Chris@42 1027 T4T = T2n + T2o;
Chris@42 1028 T2r = R1[WS(rs, 29)];
Chris@42 1029 T2s = R1[WS(rs, 13)];
Chris@42 1030 T2t = T2r - T2s;
Chris@42 1031 T4V = T2r + T2s;
Chris@42 1032 }
Chris@42 1033 {
Chris@42 1034 E T2q, T2x, T5Q, T5R;
Chris@42 1035 T2q = FNMS(KP382683432, T2p, KP923879532 * T2m);
Chris@42 1036 T2x = FMA(KP923879532, T2t, KP382683432 * T2w);
Chris@42 1037 T2y = T2q + T2x;
Chris@42 1038 T3J = T2x - T2q;
Chris@42 1039 T5Q = T4S + T4T;
Chris@42 1040 T5R = T4V + T4W;
Chris@42 1041 T5S = T5Q + T5R;
Chris@42 1042 T6a = T5R - T5Q;
Chris@42 1043 }
Chris@42 1044 {
Chris@42 1045 E T2A, T2B, T4U, T4X;
Chris@42 1046 T2A = FNMS(KP923879532, T2w, KP382683432 * T2t);
Chris@42 1047 T2B = FMA(KP382683432, T2m, KP923879532 * T2p);
Chris@42 1048 T2C = T2A - T2B;
Chris@42 1049 T3G = T2B + T2A;
Chris@42 1050 T4U = T4S - T4T;
Chris@42 1051 T4X = T4V - T4W;
Chris@42 1052 T4Y = KP707106781 * (T4U + T4X);
Chris@42 1053 T53 = KP707106781 * (T4X - T4U);
Chris@42 1054 }
Chris@42 1055 }
Chris@42 1056 {
Chris@42 1057 E Tv, T10, T5X, T5Y, T5Z, T60;
Chris@42 1058 Tv = Tf + Tu;
Chris@42 1059 T10 = TK + TZ;
Chris@42 1060 T5X = Tv + T10;
Chris@42 1061 T5Y = T5I + T5L;
Chris@42 1062 T5Z = T5P + T5S;
Chris@42 1063 T60 = T5Y + T5Z;
Chris@42 1064 Cr[WS(csr, 16)] = Tv - T10;
Chris@42 1065 Ci[WS(csi, 16)] = T5Z - T5Y;
Chris@42 1066 Cr[WS(csr, 32)] = T5X - T60;
Chris@42 1067 Cr[0] = T5X + T60;
Chris@42 1068 }
Chris@42 1069 {
Chris@42 1070 E T5F, T5V, T5U, T5W, T5M, T5T;
Chris@42 1071 T5F = Tf - Tu;
Chris@42 1072 T5V = TZ - TK;
Chris@42 1073 T5M = T5I - T5L;
Chris@42 1074 T5T = T5P - T5S;
Chris@42 1075 T5U = KP707106781 * (T5M + T5T);
Chris@42 1076 T5W = KP707106781 * (T5T - T5M);
Chris@42 1077 Cr[WS(csr, 24)] = T5F - T5U;
Chris@42 1078 Ci[WS(csi, 24)] = T5W - T5V;
Chris@42 1079 Cr[WS(csr, 8)] = T5F + T5U;
Chris@42 1080 Ci[WS(csi, 8)] = T5V + T5W;
Chris@42 1081 }
Chris@42 1082 {
Chris@42 1083 E T65, T6l, T6k, T6m, T6c, T6g, T6f, T6h;
Chris@42 1084 {
Chris@42 1085 E T61, T64, T6i, T6j;
Chris@42 1086 T61 = T7 - Te;
Chris@42 1087 T64 = KP707106781 * (T62 + T63);
Chris@42 1088 T65 = T61 + T64;
Chris@42 1089 T6l = T61 - T64;
Chris@42 1090 T6i = FNMS(KP382683432, T66, KP923879532 * T67);
Chris@42 1091 T6j = FMA(KP382683432, T69, KP923879532 * T6a);
Chris@42 1092 T6k = T6i + T6j;
Chris@42 1093 T6m = T6j - T6i;
Chris@42 1094 }
Chris@42 1095 {
Chris@42 1096 E T68, T6b, T6d, T6e;
Chris@42 1097 T68 = FMA(KP923879532, T66, KP382683432 * T67);
Chris@42 1098 T6b = FNMS(KP382683432, T6a, KP923879532 * T69);
Chris@42 1099 T6c = T68 + T6b;
Chris@42 1100 T6g = T6b - T68;
Chris@42 1101 T6d = KP707106781 * (T63 - T62);
Chris@42 1102 T6e = Tt - Tm;
Chris@42 1103 T6f = T6d - T6e;
Chris@42 1104 T6h = T6e + T6d;
Chris@42 1105 }
Chris@42 1106 Cr[WS(csr, 28)] = T65 - T6c;
Chris@42 1107 Ci[WS(csi, 28)] = T6k - T6h;
Chris@42 1108 Cr[WS(csr, 4)] = T65 + T6c;
Chris@42 1109 Ci[WS(csi, 4)] = T6h + T6k;
Chris@42 1110 Ci[WS(csi, 12)] = T6f + T6g;
Chris@42 1111 Cr[WS(csr, 12)] = T6l + T6m;
Chris@42 1112 Ci[WS(csi, 20)] = T6g - T6f;
Chris@42 1113 Cr[WS(csr, 20)] = T6l - T6m;
Chris@42 1114 }
Chris@42 1115 {
Chris@42 1116 E T5n, T5D, T5x, T5z, T5q, T5A, T5t, T5B;
Chris@42 1117 {
Chris@42 1118 E T5l, T5m, T5v, T5w;
Chris@42 1119 T5l = T4l - T4o;
Chris@42 1120 T5m = T58 - T57;
Chris@42 1121 T5n = T5l + T5m;
Chris@42 1122 T5D = T5l - T5m;
Chris@42 1123 T5v = T4v - T4s;
Chris@42 1124 T5w = T5b - T5a;
Chris@42 1125 T5x = T5v - T5w;
Chris@42 1126 T5z = T5w + T5v;
Chris@42 1127 }
Chris@42 1128 {
Chris@42 1129 E T5o, T5p, T5r, T5s;
Chris@42 1130 T5o = T4A - T4H;
Chris@42 1131 T5p = T4M - T4L;
Chris@42 1132 T5q = FMA(KP831469612, T5o, KP555570233 * T5p);
Chris@42 1133 T5A = FNMS(KP555570233, T5o, KP831469612 * T5p);
Chris@42 1134 T5r = T4R - T4Y;
Chris@42 1135 T5s = T53 - T52;
Chris@42 1136 T5t = FNMS(KP555570233, T5s, KP831469612 * T5r);
Chris@42 1137 T5B = FMA(KP555570233, T5r, KP831469612 * T5s);
Chris@42 1138 }
Chris@42 1139 {
Chris@42 1140 E T5u, T5C, T5y, T5E;
Chris@42 1141 T5u = T5q + T5t;
Chris@42 1142 Cr[WS(csr, 26)] = T5n - T5u;
Chris@42 1143 Cr[WS(csr, 6)] = T5n + T5u;
Chris@42 1144 T5C = T5A + T5B;
Chris@42 1145 Ci[WS(csi, 6)] = T5z + T5C;
Chris@42 1146 Ci[WS(csi, 26)] = T5C - T5z;
Chris@42 1147 T5y = T5t - T5q;
Chris@42 1148 Ci[WS(csi, 10)] = T5x + T5y;
Chris@42 1149 Ci[WS(csi, 22)] = T5y - T5x;
Chris@42 1150 T5E = T5B - T5A;
Chris@42 1151 Cr[WS(csr, 22)] = T5D - T5E;
Chris@42 1152 Cr[WS(csr, 10)] = T5D + T5E;
Chris@42 1153 }
Chris@42 1154 }
Chris@42 1155 {
Chris@42 1156 E T4x, T5j, T5d, T5f, T4O, T5g, T55, T5h;
Chris@42 1157 {
Chris@42 1158 E T4p, T4w, T59, T5c;
Chris@42 1159 T4p = T4l + T4o;
Chris@42 1160 T4w = T4s + T4v;
Chris@42 1161 T4x = T4p + T4w;
Chris@42 1162 T5j = T4p - T4w;
Chris@42 1163 T59 = T57 + T58;
Chris@42 1164 T5c = T5a + T5b;
Chris@42 1165 T5d = T59 - T5c;
Chris@42 1166 T5f = T5c + T59;
Chris@42 1167 }
Chris@42 1168 {
Chris@42 1169 E T4I, T4N, T4Z, T54;
Chris@42 1170 T4I = T4A + T4H;
Chris@42 1171 T4N = T4L + T4M;
Chris@42 1172 T4O = FMA(KP980785280, T4I, KP195090322 * T4N);
Chris@42 1173 T5g = FNMS(KP195090322, T4I, KP980785280 * T4N);
Chris@42 1174 T4Z = T4R + T4Y;
Chris@42 1175 T54 = T52 + T53;
Chris@42 1176 T55 = FNMS(KP195090322, T54, KP980785280 * T4Z);
Chris@42 1177 T5h = FMA(KP195090322, T4Z, KP980785280 * T54);
Chris@42 1178 }
Chris@42 1179 {
Chris@42 1180 E T56, T5i, T5e, T5k;
Chris@42 1181 T56 = T4O + T55;
Chris@42 1182 Cr[WS(csr, 30)] = T4x - T56;
Chris@42 1183 Cr[WS(csr, 2)] = T4x + T56;
Chris@42 1184 T5i = T5g + T5h;
Chris@42 1185 Ci[WS(csi, 2)] = T5f + T5i;
Chris@42 1186 Ci[WS(csi, 30)] = T5i - T5f;
Chris@42 1187 T5e = T55 - T4O;
Chris@42 1188 Ci[WS(csi, 14)] = T5d + T5e;
Chris@42 1189 Ci[WS(csi, 18)] = T5e - T5d;
Chris@42 1190 T5k = T5h - T5g;
Chris@42 1191 Cr[WS(csr, 18)] = T5j - T5k;
Chris@42 1192 Cr[WS(csr, 14)] = T5j + T5k;
Chris@42 1193 }
Chris@42 1194 }
Chris@42 1195 {
Chris@42 1196 E T3p, T41, T4c, T3S, T3w, T4b, T49, T4h, T3P, T42, T3E, T3W, T46, T4g, T3L;
Chris@42 1197 E T3X;
Chris@42 1198 {
Chris@42 1199 E T3s, T3v, T3A, T3D;
Chris@42 1200 T3p = T3n + T3o;
Chris@42 1201 T41 = T3n - T3o;
Chris@42 1202 T4c = T3R - T3Q;
Chris@42 1203 T3S = T3Q + T3R;
Chris@42 1204 T3s = FMA(KP831469612, T3q, KP555570233 * T3r);
Chris@42 1205 T3v = FNMS(KP555570233, T3u, KP831469612 * T3t);
Chris@42 1206 T3w = T3s + T3v;
Chris@42 1207 T4b = T3v - T3s;
Chris@42 1208 {
Chris@42 1209 E T47, T48, T3N, T3O;
Chris@42 1210 T47 = T3F - T3G;
Chris@42 1211 T48 = T3J - T3I;
Chris@42 1212 T49 = FNMS(KP471396736, T48, KP881921264 * T47);
Chris@42 1213 T4h = FMA(KP471396736, T47, KP881921264 * T48);
Chris@42 1214 T3N = FNMS(KP555570233, T3q, KP831469612 * T3r);
Chris@42 1215 T3O = FMA(KP555570233, T3t, KP831469612 * T3u);
Chris@42 1216 T3P = T3N + T3O;
Chris@42 1217 T42 = T3O - T3N;
Chris@42 1218 }
Chris@42 1219 T3A = T3y + T3z;
Chris@42 1220 T3D = T3B + T3C;
Chris@42 1221 T3E = FMA(KP956940335, T3A, KP290284677 * T3D);
Chris@42 1222 T3W = FNMS(KP290284677, T3A, KP956940335 * T3D);
Chris@42 1223 {
Chris@42 1224 E T44, T45, T3H, T3K;
Chris@42 1225 T44 = T3y - T3z;
Chris@42 1226 T45 = T3C - T3B;
Chris@42 1227 T46 = FMA(KP881921264, T44, KP471396736 * T45);
Chris@42 1228 T4g = FNMS(KP471396736, T44, KP881921264 * T45);
Chris@42 1229 T3H = T3F + T3G;
Chris@42 1230 T3K = T3I + T3J;
Chris@42 1231 T3L = FNMS(KP290284677, T3K, KP956940335 * T3H);
Chris@42 1232 T3X = FMA(KP290284677, T3H, KP956940335 * T3K);
Chris@42 1233 }
Chris@42 1234 }
Chris@42 1235 {
Chris@42 1236 E T3x, T3M, T3V, T3Y;
Chris@42 1237 T3x = T3p + T3w;
Chris@42 1238 T3M = T3E + T3L;
Chris@42 1239 Cr[WS(csr, 29)] = T3x - T3M;
Chris@42 1240 Cr[WS(csr, 3)] = T3x + T3M;
Chris@42 1241 T3V = T3S + T3P;
Chris@42 1242 T3Y = T3W + T3X;
Chris@42 1243 Ci[WS(csi, 3)] = T3V + T3Y;
Chris@42 1244 Ci[WS(csi, 29)] = T3Y - T3V;
Chris@42 1245 }
Chris@42 1246 {
Chris@42 1247 E T3T, T3U, T3Z, T40;
Chris@42 1248 T3T = T3P - T3S;
Chris@42 1249 T3U = T3L - T3E;
Chris@42 1250 Ci[WS(csi, 13)] = T3T + T3U;
Chris@42 1251 Ci[WS(csi, 19)] = T3U - T3T;
Chris@42 1252 T3Z = T3p - T3w;
Chris@42 1253 T40 = T3X - T3W;
Chris@42 1254 Cr[WS(csr, 19)] = T3Z - T40;
Chris@42 1255 Cr[WS(csr, 13)] = T3Z + T40;
Chris@42 1256 }
Chris@42 1257 {
Chris@42 1258 E T43, T4a, T4f, T4i;
Chris@42 1259 T43 = T41 + T42;
Chris@42 1260 T4a = T46 + T49;
Chris@42 1261 Cr[WS(csr, 27)] = T43 - T4a;
Chris@42 1262 Cr[WS(csr, 5)] = T43 + T4a;
Chris@42 1263 T4f = T4c + T4b;
Chris@42 1264 T4i = T4g + T4h;
Chris@42 1265 Ci[WS(csi, 5)] = T4f + T4i;
Chris@42 1266 Ci[WS(csi, 27)] = T4i - T4f;
Chris@42 1267 }
Chris@42 1268 {
Chris@42 1269 E T4d, T4e, T4j, T4k;
Chris@42 1270 T4d = T4b - T4c;
Chris@42 1271 T4e = T49 - T46;
Chris@42 1272 Ci[WS(csi, 11)] = T4d + T4e;
Chris@42 1273 Ci[WS(csi, 21)] = T4e - T4d;
Chris@42 1274 T4j = T41 - T42;
Chris@42 1275 T4k = T4h - T4g;
Chris@42 1276 Cr[WS(csr, 21)] = T4j - T4k;
Chris@42 1277 Cr[WS(csr, 11)] = T4j + T4k;
Chris@42 1278 }
Chris@42 1279 }
Chris@42 1280 {
Chris@42 1281 E T1d, T33, T3e, T2U, T1w, T3d, T3b, T3j, T2N, T34, T28, T2Y, T38, T3i, T2J;
Chris@42 1282 E T2Z;
Chris@42 1283 {
Chris@42 1284 E T1m, T1v, T1Y, T27;
Chris@42 1285 T1d = T15 - T1c;
Chris@42 1286 T33 = T15 + T1c;
Chris@42 1287 T3e = T2T + T2Q;
Chris@42 1288 T2U = T2Q - T2T;
Chris@42 1289 T1m = FMA(KP195090322, T1i, KP980785280 * T1l);
Chris@42 1290 T1v = FNMS(KP195090322, T1u, KP980785280 * T1r);
Chris@42 1291 T1w = T1m - T1v;
Chris@42 1292 T3d = T1v + T1m;
Chris@42 1293 {
Chris@42 1294 E T39, T3a, T2L, T2M;
Chris@42 1295 T39 = T2j + T2y;
Chris@42 1296 T3a = T2H + T2C;
Chris@42 1297 T3b = FNMS(KP098017140, T3a, KP995184726 * T39);
Chris@42 1298 T3j = FMA(KP995184726, T3a, KP098017140 * T39);
Chris@42 1299 T2L = FNMS(KP195090322, T1l, KP980785280 * T1i);
Chris@42 1300 T2M = FMA(KP980785280, T1u, KP195090322 * T1r);
Chris@42 1301 T2N = T2L - T2M;
Chris@42 1302 T34 = T2M + T2L;
Chris@42 1303 }
Chris@42 1304 T1Y = T1M - T1X;
Chris@42 1305 T27 = T23 - T26;
Chris@42 1306 T28 = FMA(KP634393284, T1Y, KP773010453 * T27);
Chris@42 1307 T2Y = FNMS(KP634393284, T27, KP773010453 * T1Y);
Chris@42 1308 {
Chris@42 1309 E T36, T37, T2z, T2I;
Chris@42 1310 T36 = T1X + T1M;
Chris@42 1311 T37 = T23 + T26;
Chris@42 1312 T38 = FMA(KP098017140, T36, KP995184726 * T37);
Chris@42 1313 T3i = FNMS(KP098017140, T37, KP995184726 * T36);
Chris@42 1314 T2z = T2j - T2y;
Chris@42 1315 T2I = T2C - T2H;
Chris@42 1316 T2J = FNMS(KP634393284, T2I, KP773010453 * T2z);
Chris@42 1317 T2Z = FMA(KP773010453, T2I, KP634393284 * T2z);
Chris@42 1318 }
Chris@42 1319 }
Chris@42 1320 {
Chris@42 1321 E T1x, T2K, T2X, T30;
Chris@42 1322 T1x = T1d + T1w;
Chris@42 1323 T2K = T28 + T2J;
Chris@42 1324 Cr[WS(csr, 25)] = T1x - T2K;
Chris@42 1325 Cr[WS(csr, 7)] = T1x + T2K;
Chris@42 1326 T2X = T2U + T2N;
Chris@42 1327 T30 = T2Y + T2Z;
Chris@42 1328 Ci[WS(csi, 7)] = T2X + T30;
Chris@42 1329 Ci[WS(csi, 25)] = T30 - T2X;
Chris@42 1330 }
Chris@42 1331 {
Chris@42 1332 E T2V, T2W, T31, T32;
Chris@42 1333 T2V = T2N - T2U;
Chris@42 1334 T2W = T2J - T28;
Chris@42 1335 Ci[WS(csi, 9)] = T2V + T2W;
Chris@42 1336 Ci[WS(csi, 23)] = T2W - T2V;
Chris@42 1337 T31 = T1d - T1w;
Chris@42 1338 T32 = T2Z - T2Y;
Chris@42 1339 Cr[WS(csr, 23)] = T31 - T32;
Chris@42 1340 Cr[WS(csr, 9)] = T31 + T32;
Chris@42 1341 }
Chris@42 1342 {
Chris@42 1343 E T35, T3c, T3h, T3k;
Chris@42 1344 T35 = T33 + T34;
Chris@42 1345 T3c = T38 + T3b;
Chris@42 1346 Cr[WS(csr, 31)] = T35 - T3c;
Chris@42 1347 Cr[WS(csr, 1)] = T35 + T3c;
Chris@42 1348 T3h = T3e + T3d;
Chris@42 1349 T3k = T3i + T3j;
Chris@42 1350 Ci[WS(csi, 1)] = T3h + T3k;
Chris@42 1351 Ci[WS(csi, 31)] = T3k - T3h;
Chris@42 1352 }
Chris@42 1353 {
Chris@42 1354 E T3f, T3g, T3l, T3m;
Chris@42 1355 T3f = T3d - T3e;
Chris@42 1356 T3g = T3b - T38;
Chris@42 1357 Ci[WS(csi, 15)] = T3f + T3g;
Chris@42 1358 Ci[WS(csi, 17)] = T3g - T3f;
Chris@42 1359 T3l = T33 - T34;
Chris@42 1360 T3m = T3j - T3i;
Chris@42 1361 Cr[WS(csr, 17)] = T3l - T3m;
Chris@42 1362 Cr[WS(csr, 15)] = T3l + T3m;
Chris@42 1363 }
Chris@42 1364 }
Chris@42 1365 }
Chris@42 1366 }
Chris@42 1367 }
Chris@42 1368
Chris@42 1369 static const kr2c_desc desc = { 64, "r2cf_64", {342, 72, 52, 0}, &GENUS };
Chris@42 1370
Chris@42 1371 void X(codelet_r2cf_64) (planner *p) {
Chris@42 1372 X(kr2c_register) (p, r2cf_64, &desc);
Chris@42 1373 }
Chris@42 1374
Chris@42 1375 #endif /* HAVE_FMA */