annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:27 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 394 FP additions, 196 FP multiplications,
Chris@82 32 * (or, 198 additions, 0 multiplications, 196 fused multiply/add),
Chris@82 33 * 106 stack variables, 15 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 40 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 41 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@82 42 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@82 43 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 44 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 45 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@82 46 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@82 47 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 48 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 49 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 50 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 51 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 52 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 53 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 54 {
Chris@82 55 INT i;
Chris@82 56 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 57 E T11, T2j, T3D, T5p, T4P, T5P, T7, Te, Tf, T1k, T1H, T4a, T5A, T4l, T5D;
Chris@82 58 E T2U, T3i, T1R, T2e, T4v, T5H, T4G, T5K, T31, T3l, T3Z, T5t, T42, T5s, TZ;
Chris@82 59 E T3f, T1b, T2n, T3Q, T5w, T3T, T5v, TK, T3e, T18, T2m, Tm, Tt, Tu, T4S;
Chris@82 60 E T5q, T14, T2k, T3K, T5Q, T1z, T1I, T4o, T5B, T2X, T3j, T4h, T5E, T26, T2f;
Chris@82 61 E T4J, T5I, T34, T3m, T4C, T5L;
Chris@82 62 {
Chris@82 63 E T3, T3z, Td, T3B, T6, T4N, Ta, T3A, T3C, T4O;
Chris@82 64 {
Chris@82 65 E T1, T2, Tb, Tc;
Chris@82 66 T1 = R0[0];
Chris@82 67 T2 = R0[WS(rs, 16)];
Chris@82 68 T3 = T1 + T2;
Chris@82 69 T3z = T1 - T2;
Chris@82 70 Tb = R0[WS(rs, 28)];
Chris@82 71 Tc = R0[WS(rs, 12)];
Chris@82 72 Td = Tb + Tc;
Chris@82 73 T3B = Tb - Tc;
Chris@82 74 }
Chris@82 75 {
Chris@82 76 E T4, T5, T8, T9;
Chris@82 77 T4 = R0[WS(rs, 8)];
Chris@82 78 T5 = R0[WS(rs, 24)];
Chris@82 79 T6 = T4 + T5;
Chris@82 80 T4N = T4 - T5;
Chris@82 81 T8 = R0[WS(rs, 4)];
Chris@82 82 T9 = R0[WS(rs, 20)];
Chris@82 83 Ta = T8 + T9;
Chris@82 84 T3A = T8 - T9;
Chris@82 85 }
Chris@82 86 T11 = T3 - T6;
Chris@82 87 T2j = Td - Ta;
Chris@82 88 T3C = T3A + T3B;
Chris@82 89 T3D = FMA(KP707106781, T3C, T3z);
Chris@82 90 T5p = FNMS(KP707106781, T3C, T3z);
Chris@82 91 T4O = T3B - T3A;
Chris@82 92 T4P = FNMS(KP707106781, T4O, T4N);
Chris@82 93 T5P = FMA(KP707106781, T4O, T4N);
Chris@82 94 T7 = T3 + T6;
Chris@82 95 Te = Ta + Td;
Chris@82 96 Tf = T7 + Te;
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E T1g, T46, T1G, T47, T1j, T4j, T1D, T48;
Chris@82 100 {
Chris@82 101 E T1e, T1f, T1E, T1F;
Chris@82 102 T1e = R1[0];
Chris@82 103 T1f = R1[WS(rs, 16)];
Chris@82 104 T1g = T1e + T1f;
Chris@82 105 T46 = T1e - T1f;
Chris@82 106 T1E = R1[WS(rs, 4)];
Chris@82 107 T1F = R1[WS(rs, 20)];
Chris@82 108 T1G = T1E + T1F;
Chris@82 109 T47 = T1E - T1F;
Chris@82 110 }
Chris@82 111 {
Chris@82 112 E T1h, T1i, T1B, T1C;
Chris@82 113 T1h = R1[WS(rs, 8)];
Chris@82 114 T1i = R1[WS(rs, 24)];
Chris@82 115 T1j = T1h + T1i;
Chris@82 116 T4j = T1h - T1i;
Chris@82 117 T1B = R1[WS(rs, 28)];
Chris@82 118 T1C = R1[WS(rs, 12)];
Chris@82 119 T1D = T1B + T1C;
Chris@82 120 T48 = T1B - T1C;
Chris@82 121 }
Chris@82 122 T1k = T1g - T1j;
Chris@82 123 T1H = T1D - T1G;
Chris@82 124 {
Chris@82 125 E T49, T4k, T2S, T2T;
Chris@82 126 T49 = T47 + T48;
Chris@82 127 T4a = FMA(KP707106781, T49, T46);
Chris@82 128 T5A = FNMS(KP707106781, T49, T46);
Chris@82 129 T4k = T47 - T48;
Chris@82 130 T4l = FMA(KP707106781, T4k, T4j);
Chris@82 131 T5D = FNMS(KP707106781, T4k, T4j);
Chris@82 132 T2S = T1g + T1j;
Chris@82 133 T2T = T1G + T1D;
Chris@82 134 T2U = T2S + T2T;
Chris@82 135 T3i = T2S - T2T;
Chris@82 136 }
Chris@82 137 }
Chris@82 138 {
Chris@82 139 E T1N, T4r, T2d, T4s, T1Q, T4E, T2a, T4t;
Chris@82 140 {
Chris@82 141 E T1L, T1M, T2b, T2c;
Chris@82 142 T1L = R1[WS(rs, 31)];
Chris@82 143 T1M = R1[WS(rs, 15)];
Chris@82 144 T1N = T1L + T1M;
Chris@82 145 T4r = T1L - T1M;
Chris@82 146 T2b = R1[WS(rs, 3)];
Chris@82 147 T2c = R1[WS(rs, 19)];
Chris@82 148 T2d = T2b + T2c;
Chris@82 149 T4s = T2b - T2c;
Chris@82 150 }
Chris@82 151 {
Chris@82 152 E T1O, T1P, T28, T29;
Chris@82 153 T1O = R1[WS(rs, 7)];
Chris@82 154 T1P = R1[WS(rs, 23)];
Chris@82 155 T1Q = T1O + T1P;
Chris@82 156 T4E = T1P - T1O;
Chris@82 157 T28 = R1[WS(rs, 27)];
Chris@82 158 T29 = R1[WS(rs, 11)];
Chris@82 159 T2a = T28 + T29;
Chris@82 160 T4t = T28 - T29;
Chris@82 161 }
Chris@82 162 T1R = T1N - T1Q;
Chris@82 163 T2e = T2a - T2d;
Chris@82 164 {
Chris@82 165 E T4u, T4F, T2Z, T30;
Chris@82 166 T4u = T4s + T4t;
Chris@82 167 T4v = FMA(KP707106781, T4u, T4r);
Chris@82 168 T5H = FNMS(KP707106781, T4u, T4r);
Chris@82 169 T4F = T4t - T4s;
Chris@82 170 T4G = FMA(KP707106781, T4F, T4E);
Chris@82 171 T5K = FNMS(KP707106781, T4F, T4E);
Chris@82 172 T2Z = T1N + T1Q;
Chris@82 173 T30 = T2d + T2a;
Chris@82 174 T31 = T2Z + T30;
Chris@82 175 T3l = T2Z - T30;
Chris@82 176 }
Chris@82 177 }
Chris@82 178 {
Chris@82 179 E TN, T3V, TX, T3X, TQ, T40, TU, T3W, T3Y, T41;
Chris@82 180 {
Chris@82 181 E TL, TM, TV, TW;
Chris@82 182 TL = R0[WS(rs, 31)];
Chris@82 183 TM = R0[WS(rs, 15)];
Chris@82 184 TN = TL + TM;
Chris@82 185 T3V = TL - TM;
Chris@82 186 TV = R0[WS(rs, 27)];
Chris@82 187 TW = R0[WS(rs, 11)];
Chris@82 188 TX = TV + TW;
Chris@82 189 T3X = TV - TW;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E TO, TP, TS, TT;
Chris@82 193 TO = R0[WS(rs, 7)];
Chris@82 194 TP = R0[WS(rs, 23)];
Chris@82 195 TQ = TO + TP;
Chris@82 196 T40 = TO - TP;
Chris@82 197 TS = R0[WS(rs, 3)];
Chris@82 198 TT = R0[WS(rs, 19)];
Chris@82 199 TU = TS + TT;
Chris@82 200 T3W = TS - TT;
Chris@82 201 }
Chris@82 202 T3Y = T3W + T3X;
Chris@82 203 T3Z = FMA(KP707106781, T3Y, T3V);
Chris@82 204 T5t = FNMS(KP707106781, T3Y, T3V);
Chris@82 205 T41 = T3W - T3X;
Chris@82 206 T42 = FMA(KP707106781, T41, T40);
Chris@82 207 T5s = FNMS(KP707106781, T41, T40);
Chris@82 208 {
Chris@82 209 E TR, TY, T19, T1a;
Chris@82 210 TR = TN + TQ;
Chris@82 211 TY = TU + TX;
Chris@82 212 TZ = TR + TY;
Chris@82 213 T3f = TR - TY;
Chris@82 214 T19 = TN - TQ;
Chris@82 215 T1a = TX - TU;
Chris@82 216 T1b = FNMS(KP414213562, T1a, T19);
Chris@82 217 T2n = FMA(KP414213562, T19, T1a);
Chris@82 218 }
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E Ty, T3M, TI, T3O, TB, T3R, TF, T3N, T3P, T3S;
Chris@82 222 {
Chris@82 223 E Tw, Tx, TG, TH;
Chris@82 224 Tw = R0[WS(rs, 1)];
Chris@82 225 Tx = R0[WS(rs, 17)];
Chris@82 226 Ty = Tw + Tx;
Chris@82 227 T3M = Tw - Tx;
Chris@82 228 TG = R0[WS(rs, 29)];
Chris@82 229 TH = R0[WS(rs, 13)];
Chris@82 230 TI = TG + TH;
Chris@82 231 T3O = TG - TH;
Chris@82 232 }
Chris@82 233 {
Chris@82 234 E Tz, TA, TD, TE;
Chris@82 235 Tz = R0[WS(rs, 9)];
Chris@82 236 TA = R0[WS(rs, 25)];
Chris@82 237 TB = Tz + TA;
Chris@82 238 T3R = Tz - TA;
Chris@82 239 TD = R0[WS(rs, 5)];
Chris@82 240 TE = R0[WS(rs, 21)];
Chris@82 241 TF = TD + TE;
Chris@82 242 T3N = TD - TE;
Chris@82 243 }
Chris@82 244 T3P = T3N + T3O;
Chris@82 245 T3Q = FMA(KP707106781, T3P, T3M);
Chris@82 246 T5w = FNMS(KP707106781, T3P, T3M);
Chris@82 247 T3S = T3N - T3O;
Chris@82 248 T3T = FMA(KP707106781, T3S, T3R);
Chris@82 249 T5v = FNMS(KP707106781, T3S, T3R);
Chris@82 250 {
Chris@82 251 E TC, TJ, T16, T17;
Chris@82 252 TC = Ty + TB;
Chris@82 253 TJ = TF + TI;
Chris@82 254 TK = TC + TJ;
Chris@82 255 T3e = TC - TJ;
Chris@82 256 T16 = Ty - TB;
Chris@82 257 T17 = TI - TF;
Chris@82 258 T18 = FMA(KP414213562, T17, T16);
Chris@82 259 T2m = FNMS(KP414213562, T16, T17);
Chris@82 260 }
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E Ti, T3E, Ts, T3I, Tl, T3F, Tp, T3H, T4Q, T4R;
Chris@82 264 {
Chris@82 265 E Tg, Th, Tq, Tr;
Chris@82 266 Tg = R0[WS(rs, 2)];
Chris@82 267 Th = R0[WS(rs, 18)];
Chris@82 268 Ti = Tg + Th;
Chris@82 269 T3E = Tg - Th;
Chris@82 270 Tq = R0[WS(rs, 6)];
Chris@82 271 Tr = R0[WS(rs, 22)];
Chris@82 272 Ts = Tq + Tr;
Chris@82 273 T3I = Tq - Tr;
Chris@82 274 }
Chris@82 275 {
Chris@82 276 E Tj, Tk, Tn, To;
Chris@82 277 Tj = R0[WS(rs, 10)];
Chris@82 278 Tk = R0[WS(rs, 26)];
Chris@82 279 Tl = Tj + Tk;
Chris@82 280 T3F = Tj - Tk;
Chris@82 281 Tn = R0[WS(rs, 30)];
Chris@82 282 To = R0[WS(rs, 14)];
Chris@82 283 Tp = Tn + To;
Chris@82 284 T3H = Tn - To;
Chris@82 285 }
Chris@82 286 Tm = Ti + Tl;
Chris@82 287 Tt = Tp + Ts;
Chris@82 288 Tu = Tm + Tt;
Chris@82 289 T4Q = FMA(KP414213562, T3E, T3F);
Chris@82 290 T4R = FNMS(KP414213562, T3H, T3I);
Chris@82 291 T4S = T4Q + T4R;
Chris@82 292 T5q = T4Q - T4R;
Chris@82 293 {
Chris@82 294 E T12, T13, T3G, T3J;
Chris@82 295 T12 = Ti - Tl;
Chris@82 296 T13 = Tp - Ts;
Chris@82 297 T14 = T12 + T13;
Chris@82 298 T2k = T13 - T12;
Chris@82 299 T3G = FNMS(KP414213562, T3F, T3E);
Chris@82 300 T3J = FMA(KP414213562, T3I, T3H);
Chris@82 301 T3K = T3G + T3J;
Chris@82 302 T5Q = T3J - T3G;
Chris@82 303 }
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E T1n, T4b, T1x, T4f, T1q, T4c, T1u, T4e;
Chris@82 307 {
Chris@82 308 E T1l, T1m, T1v, T1w;
Chris@82 309 T1l = R1[WS(rs, 2)];
Chris@82 310 T1m = R1[WS(rs, 18)];
Chris@82 311 T1n = T1l + T1m;
Chris@82 312 T4b = T1l - T1m;
Chris@82 313 T1v = R1[WS(rs, 6)];
Chris@82 314 T1w = R1[WS(rs, 22)];
Chris@82 315 T1x = T1v + T1w;
Chris@82 316 T4f = T1v - T1w;
Chris@82 317 }
Chris@82 318 {
Chris@82 319 E T1o, T1p, T1s, T1t;
Chris@82 320 T1o = R1[WS(rs, 10)];
Chris@82 321 T1p = R1[WS(rs, 26)];
Chris@82 322 T1q = T1o + T1p;
Chris@82 323 T4c = T1o - T1p;
Chris@82 324 T1s = R1[WS(rs, 30)];
Chris@82 325 T1t = R1[WS(rs, 14)];
Chris@82 326 T1u = T1s + T1t;
Chris@82 327 T4e = T1s - T1t;
Chris@82 328 }
Chris@82 329 {
Chris@82 330 E T1r, T1y, T4m, T4n;
Chris@82 331 T1r = T1n - T1q;
Chris@82 332 T1y = T1u - T1x;
Chris@82 333 T1z = T1r + T1y;
Chris@82 334 T1I = T1y - T1r;
Chris@82 335 T4m = FMA(KP414213562, T4b, T4c);
Chris@82 336 T4n = FNMS(KP414213562, T4e, T4f);
Chris@82 337 T4o = T4m + T4n;
Chris@82 338 T5B = T4m - T4n;
Chris@82 339 }
Chris@82 340 {
Chris@82 341 E T2V, T2W, T4d, T4g;
Chris@82 342 T2V = T1n + T1q;
Chris@82 343 T2W = T1u + T1x;
Chris@82 344 T2X = T2V + T2W;
Chris@82 345 T3j = T2W - T2V;
Chris@82 346 T4d = FNMS(KP414213562, T4c, T4b);
Chris@82 347 T4g = FMA(KP414213562, T4f, T4e);
Chris@82 348 T4h = T4d + T4g;
Chris@82 349 T5E = T4g - T4d;
Chris@82 350 }
Chris@82 351 }
Chris@82 352 {
Chris@82 353 E T1U, T4w, T24, T4A, T1X, T4x, T21, T4z;
Chris@82 354 {
Chris@82 355 E T1S, T1T, T22, T23;
Chris@82 356 T1S = R1[WS(rs, 1)];
Chris@82 357 T1T = R1[WS(rs, 17)];
Chris@82 358 T1U = T1S + T1T;
Chris@82 359 T4w = T1S - T1T;
Chris@82 360 T22 = R1[WS(rs, 5)];
Chris@82 361 T23 = R1[WS(rs, 21)];
Chris@82 362 T24 = T22 + T23;
Chris@82 363 T4A = T23 - T22;
Chris@82 364 }
Chris@82 365 {
Chris@82 366 E T1V, T1W, T1Z, T20;
Chris@82 367 T1V = R1[WS(rs, 9)];
Chris@82 368 T1W = R1[WS(rs, 25)];
Chris@82 369 T1X = T1V + T1W;
Chris@82 370 T4x = T1W - T1V;
Chris@82 371 T1Z = R1[WS(rs, 29)];
Chris@82 372 T20 = R1[WS(rs, 13)];
Chris@82 373 T21 = T1Z + T20;
Chris@82 374 T4z = T1Z - T20;
Chris@82 375 }
Chris@82 376 {
Chris@82 377 E T1Y, T25, T4H, T4I;
Chris@82 378 T1Y = T1U - T1X;
Chris@82 379 T25 = T21 - T24;
Chris@82 380 T26 = T1Y + T25;
Chris@82 381 T2f = T25 - T1Y;
Chris@82 382 T4H = FNMS(KP414213562, T4w, T4x);
Chris@82 383 T4I = FMA(KP414213562, T4z, T4A);
Chris@82 384 T4J = T4H + T4I;
Chris@82 385 T5I = T4I - T4H;
Chris@82 386 }
Chris@82 387 {
Chris@82 388 E T32, T33, T4y, T4B;
Chris@82 389 T32 = T1U + T1X;
Chris@82 390 T33 = T21 + T24;
Chris@82 391 T34 = T32 + T33;
Chris@82 392 T3m = T33 - T32;
Chris@82 393 T4y = FMA(KP414213562, T4x, T4w);
Chris@82 394 T4B = FNMS(KP414213562, T4A, T4z);
Chris@82 395 T4C = T4y + T4B;
Chris@82 396 T5L = T4B - T4y;
Chris@82 397 }
Chris@82 398 }
Chris@82 399 {
Chris@82 400 E Tv, T10, T39, T3a, T3b, T3c;
Chris@82 401 Tv = Tf + Tu;
Chris@82 402 T10 = TK + TZ;
Chris@82 403 T39 = Tv + T10;
Chris@82 404 T3a = T2U + T2X;
Chris@82 405 T3b = T31 + T34;
Chris@82 406 T3c = T3a + T3b;
Chris@82 407 Cr[WS(csr, 16)] = Tv - T10;
Chris@82 408 Ci[WS(csi, 16)] = T3b - T3a;
Chris@82 409 Cr[WS(csr, 32)] = T39 - T3c;
Chris@82 410 Cr[0] = T39 + T3c;
Chris@82 411 }
Chris@82 412 {
Chris@82 413 E T2R, T37, T36, T38, T2Y, T35;
Chris@82 414 T2R = Tf - Tu;
Chris@82 415 T37 = TZ - TK;
Chris@82 416 T2Y = T2U - T2X;
Chris@82 417 T35 = T31 - T34;
Chris@82 418 T36 = T2Y + T35;
Chris@82 419 T38 = T35 - T2Y;
Chris@82 420 Cr[WS(csr, 24)] = FNMS(KP707106781, T36, T2R);
Chris@82 421 Ci[WS(csi, 24)] = FMS(KP707106781, T38, T37);
Chris@82 422 Cr[WS(csr, 8)] = FMA(KP707106781, T36, T2R);
Chris@82 423 Ci[WS(csi, 8)] = FMA(KP707106781, T38, T37);
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T3h, T3x, T3w, T3y, T3o, T3s, T3r, T3t;
Chris@82 427 {
Chris@82 428 E T3d, T3g, T3u, T3v;
Chris@82 429 T3d = T7 - Te;
Chris@82 430 T3g = T3e + T3f;
Chris@82 431 T3h = FMA(KP707106781, T3g, T3d);
Chris@82 432 T3x = FNMS(KP707106781, T3g, T3d);
Chris@82 433 T3u = FNMS(KP414213562, T3i, T3j);
Chris@82 434 T3v = FMA(KP414213562, T3l, T3m);
Chris@82 435 T3w = T3u + T3v;
Chris@82 436 T3y = T3v - T3u;
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E T3k, T3n, T3p, T3q;
Chris@82 440 T3k = FMA(KP414213562, T3j, T3i);
Chris@82 441 T3n = FNMS(KP414213562, T3m, T3l);
Chris@82 442 T3o = T3k + T3n;
Chris@82 443 T3s = T3n - T3k;
Chris@82 444 T3p = Tt - Tm;
Chris@82 445 T3q = T3f - T3e;
Chris@82 446 T3r = FNMS(KP707106781, T3q, T3p);
Chris@82 447 T3t = FMA(KP707106781, T3q, T3p);
Chris@82 448 }
Chris@82 449 Cr[WS(csr, 28)] = FNMS(KP923879532, T3o, T3h);
Chris@82 450 Ci[WS(csi, 28)] = FMS(KP923879532, T3w, T3t);
Chris@82 451 Cr[WS(csr, 4)] = FMA(KP923879532, T3o, T3h);
Chris@82 452 Ci[WS(csi, 4)] = FMA(KP923879532, T3w, T3t);
Chris@82 453 Ci[WS(csi, 12)] = FMS(KP923879532, T3s, T3r);
Chris@82 454 Cr[WS(csr, 12)] = FMA(KP923879532, T3y, T3x);
Chris@82 455 Ci[WS(csi, 20)] = FMA(KP923879532, T3s, T3r);
Chris@82 456 Cr[WS(csr, 20)] = FNMS(KP923879532, T3y, T3x);
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E T2z, T2P, T2J, T2L, T2C, T2M, T2F, T2N;
Chris@82 460 {
Chris@82 461 E T2x, T2y, T2H, T2I;
Chris@82 462 T2x = FNMS(KP707106781, T14, T11);
Chris@82 463 T2y = T2n - T2m;
Chris@82 464 T2z = FMA(KP923879532, T2y, T2x);
Chris@82 465 T2P = FNMS(KP923879532, T2y, T2x);
Chris@82 466 T2H = FNMS(KP707106781, T2k, T2j);
Chris@82 467 T2I = T1b - T18;
Chris@82 468 T2J = FMA(KP923879532, T2I, T2H);
Chris@82 469 T2L = FNMS(KP923879532, T2I, T2H);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T2A, T2B, T2D, T2E;
Chris@82 473 T2A = FNMS(KP707106781, T1z, T1k);
Chris@82 474 T2B = FNMS(KP707106781, T1I, T1H);
Chris@82 475 T2C = FNMS(KP668178637, T2B, T2A);
Chris@82 476 T2M = FMA(KP668178637, T2A, T2B);
Chris@82 477 T2D = FNMS(KP707106781, T26, T1R);
Chris@82 478 T2E = FNMS(KP707106781, T2f, T2e);
Chris@82 479 T2F = FMA(KP668178637, T2E, T2D);
Chris@82 480 T2N = FNMS(KP668178637, T2D, T2E);
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E T2G, T2O, T2K, T2Q;
Chris@82 484 T2G = T2C + T2F;
Chris@82 485 Cr[WS(csr, 26)] = FNMS(KP831469612, T2G, T2z);
Chris@82 486 Cr[WS(csr, 6)] = FMA(KP831469612, T2G, T2z);
Chris@82 487 T2O = T2M + T2N;
Chris@82 488 Ci[WS(csi, 6)] = -(FMA(KP831469612, T2O, T2L));
Chris@82 489 Ci[WS(csi, 26)] = FNMS(KP831469612, T2O, T2L);
Chris@82 490 T2K = T2F - T2C;
Chris@82 491 Ci[WS(csi, 10)] = FMA(KP831469612, T2K, T2J);
Chris@82 492 Ci[WS(csi, 22)] = FMS(KP831469612, T2K, T2J);
Chris@82 493 T2Q = T2M - T2N;
Chris@82 494 Cr[WS(csr, 22)] = FNMS(KP831469612, T2Q, T2P);
Chris@82 495 Cr[WS(csr, 10)] = FMA(KP831469612, T2Q, T2P);
Chris@82 496 }
Chris@82 497 }
Chris@82 498 {
Chris@82 499 E T1d, T2v, T2p, T2r, T1K, T2s, T2h, T2t;
Chris@82 500 {
Chris@82 501 E T15, T1c, T2l, T2o;
Chris@82 502 T15 = FMA(KP707106781, T14, T11);
Chris@82 503 T1c = T18 + T1b;
Chris@82 504 T1d = FMA(KP923879532, T1c, T15);
Chris@82 505 T2v = FNMS(KP923879532, T1c, T15);
Chris@82 506 T2l = FMA(KP707106781, T2k, T2j);
Chris@82 507 T2o = T2m + T2n;
Chris@82 508 T2p = FNMS(KP923879532, T2o, T2l);
Chris@82 509 T2r = FMA(KP923879532, T2o, T2l);
Chris@82 510 }
Chris@82 511 {
Chris@82 512 E T1A, T1J, T27, T2g;
Chris@82 513 T1A = FMA(KP707106781, T1z, T1k);
Chris@82 514 T1J = FMA(KP707106781, T1I, T1H);
Chris@82 515 T1K = FMA(KP198912367, T1J, T1A);
Chris@82 516 T2s = FNMS(KP198912367, T1A, T1J);
Chris@82 517 T27 = FMA(KP707106781, T26, T1R);
Chris@82 518 T2g = FMA(KP707106781, T2f, T2e);
Chris@82 519 T2h = FNMS(KP198912367, T2g, T27);
Chris@82 520 T2t = FMA(KP198912367, T27, T2g);
Chris@82 521 }
Chris@82 522 {
Chris@82 523 E T2i, T2u, T2q, T2w;
Chris@82 524 T2i = T1K + T2h;
Chris@82 525 Cr[WS(csr, 30)] = FNMS(KP980785280, T2i, T1d);
Chris@82 526 Cr[WS(csr, 2)] = FMA(KP980785280, T2i, T1d);
Chris@82 527 T2u = T2s + T2t;
Chris@82 528 Ci[WS(csi, 2)] = FMA(KP980785280, T2u, T2r);
Chris@82 529 Ci[WS(csi, 30)] = FMS(KP980785280, T2u, T2r);
Chris@82 530 T2q = T2h - T1K;
Chris@82 531 Ci[WS(csi, 14)] = FMS(KP980785280, T2q, T2p);
Chris@82 532 Ci[WS(csi, 18)] = FMA(KP980785280, T2q, T2p);
Chris@82 533 T2w = T2t - T2s;
Chris@82 534 Cr[WS(csr, 18)] = FNMS(KP980785280, T2w, T2v);
Chris@82 535 Cr[WS(csr, 14)] = FMA(KP980785280, T2w, T2v);
Chris@82 536 }
Chris@82 537 }
Chris@82 538 {
Chris@82 539 E T5r, T63, T6d, T5R, T5y, T6e, T6b, T6j, T5U, T64, T5G, T5Z, T68, T6i, T5N;
Chris@82 540 E T5Y;
Chris@82 541 {
Chris@82 542 E T5u, T5x, T5C, T5F;
Chris@82 543 T5r = FNMS(KP923879532, T5q, T5p);
Chris@82 544 T63 = FMA(KP923879532, T5q, T5p);
Chris@82 545 T6d = FMA(KP923879532, T5Q, T5P);
Chris@82 546 T5R = FNMS(KP923879532, T5Q, T5P);
Chris@82 547 T5u = FMA(KP668178637, T5t, T5s);
Chris@82 548 T5x = FNMS(KP668178637, T5w, T5v);
Chris@82 549 T5y = T5u - T5x;
Chris@82 550 T6e = T5x + T5u;
Chris@82 551 {
Chris@82 552 E T69, T6a, T5S, T5T;
Chris@82 553 T69 = FMA(KP923879532, T5I, T5H);
Chris@82 554 T6a = FNMS(KP923879532, T5L, T5K);
Chris@82 555 T6b = FMA(KP303346683, T6a, T69);
Chris@82 556 T6j = FNMS(KP303346683, T69, T6a);
Chris@82 557 T5S = FNMS(KP668178637, T5s, T5t);
Chris@82 558 T5T = FMA(KP668178637, T5v, T5w);
Chris@82 559 T5U = T5S - T5T;
Chris@82 560 T64 = T5T + T5S;
Chris@82 561 }
Chris@82 562 T5C = FNMS(KP923879532, T5B, T5A);
Chris@82 563 T5F = FNMS(KP923879532, T5E, T5D);
Chris@82 564 T5G = FNMS(KP534511135, T5F, T5C);
Chris@82 565 T5Z = FMA(KP534511135, T5C, T5F);
Chris@82 566 {
Chris@82 567 E T66, T67, T5J, T5M;
Chris@82 568 T66 = FMA(KP923879532, T5B, T5A);
Chris@82 569 T67 = FMA(KP923879532, T5E, T5D);
Chris@82 570 T68 = FMA(KP303346683, T67, T66);
Chris@82 571 T6i = FNMS(KP303346683, T66, T67);
Chris@82 572 T5J = FNMS(KP923879532, T5I, T5H);
Chris@82 573 T5M = FMA(KP923879532, T5L, T5K);
Chris@82 574 T5N = FNMS(KP534511135, T5M, T5J);
Chris@82 575 T5Y = FMA(KP534511135, T5J, T5M);
Chris@82 576 }
Chris@82 577 }
Chris@82 578 {
Chris@82 579 E T5z, T5O, T5X, T60;
Chris@82 580 T5z = FMA(KP831469612, T5y, T5r);
Chris@82 581 T5O = T5G + T5N;
Chris@82 582 Cr[WS(csr, 27)] = FNMS(KP881921264, T5O, T5z);
Chris@82 583 Cr[WS(csr, 5)] = FMA(KP881921264, T5O, T5z);
Chris@82 584 T5X = FNMS(KP831469612, T5U, T5R);
Chris@82 585 T60 = T5Y - T5Z;
Chris@82 586 Ci[WS(csi, 5)] = FMS(KP881921264, T60, T5X);
Chris@82 587 Ci[WS(csi, 27)] = FMA(KP881921264, T60, T5X);
Chris@82 588 }
Chris@82 589 {
Chris@82 590 E T5V, T5W, T61, T62;
Chris@82 591 T5V = FMA(KP831469612, T5U, T5R);
Chris@82 592 T5W = T5N - T5G;
Chris@82 593 Ci[WS(csi, 11)] = FMA(KP881921264, T5W, T5V);
Chris@82 594 Ci[WS(csi, 21)] = FMS(KP881921264, T5W, T5V);
Chris@82 595 T61 = FNMS(KP831469612, T5y, T5r);
Chris@82 596 T62 = T5Z + T5Y;
Chris@82 597 Cr[WS(csr, 21)] = FNMS(KP881921264, T62, T61);
Chris@82 598 Cr[WS(csr, 11)] = FMA(KP881921264, T62, T61);
Chris@82 599 }
Chris@82 600 {
Chris@82 601 E T65, T6c, T6h, T6k;
Chris@82 602 T65 = FMA(KP831469612, T64, T63);
Chris@82 603 T6c = T68 + T6b;
Chris@82 604 Cr[WS(csr, 29)] = FNMS(KP956940335, T6c, T65);
Chris@82 605 Cr[WS(csr, 3)] = FMA(KP956940335, T6c, T65);
Chris@82 606 T6h = FMA(KP831469612, T6e, T6d);
Chris@82 607 T6k = T6i - T6j;
Chris@82 608 Ci[WS(csi, 3)] = FMA(KP956940335, T6k, T6h);
Chris@82 609 Ci[WS(csi, 29)] = FMS(KP956940335, T6k, T6h);
Chris@82 610 }
Chris@82 611 {
Chris@82 612 E T6f, T6g, T6l, T6m;
Chris@82 613 T6f = FNMS(KP831469612, T6e, T6d);
Chris@82 614 T6g = T6b - T68;
Chris@82 615 Ci[WS(csi, 13)] = FMS(KP956940335, T6g, T6f);
Chris@82 616 Ci[WS(csi, 19)] = FMA(KP956940335, T6g, T6f);
Chris@82 617 T6l = FNMS(KP831469612, T64, T63);
Chris@82 618 T6m = T6i + T6j;
Chris@82 619 Cr[WS(csr, 19)] = FMA(KP956940335, T6m, T6l);
Chris@82 620 Cr[WS(csr, 13)] = FNMS(KP956940335, T6m, T6l);
Chris@82 621 }
Chris@82 622 }
Chris@82 623 {
Chris@82 624 E T3L, T55, T5f, T4T, T44, T5g, T5d, T5l, T4W, T56, T4q, T51, T5a, T5k, T4L;
Chris@82 625 E T50;
Chris@82 626 {
Chris@82 627 E T3U, T43, T4i, T4p;
Chris@82 628 T3L = FMA(KP923879532, T3K, T3D);
Chris@82 629 T55 = FNMS(KP923879532, T3K, T3D);
Chris@82 630 T5f = FNMS(KP923879532, T4S, T4P);
Chris@82 631 T4T = FMA(KP923879532, T4S, T4P);
Chris@82 632 T3U = FNMS(KP198912367, T3T, T3Q);
Chris@82 633 T43 = FMA(KP198912367, T42, T3Z);
Chris@82 634 T44 = T3U + T43;
Chris@82 635 T5g = T43 - T3U;
Chris@82 636 {
Chris@82 637 E T5b, T5c, T4U, T4V;
Chris@82 638 T5b = FNMS(KP923879532, T4C, T4v);
Chris@82 639 T5c = FNMS(KP923879532, T4J, T4G);
Chris@82 640 T5d = FMA(KP820678790, T5c, T5b);
Chris@82 641 T5l = FNMS(KP820678790, T5b, T5c);
Chris@82 642 T4U = FMA(KP198912367, T3Q, T3T);
Chris@82 643 T4V = FNMS(KP198912367, T3Z, T42);
Chris@82 644 T4W = T4U + T4V;
Chris@82 645 T56 = T4U - T4V;
Chris@82 646 }
Chris@82 647 T4i = FMA(KP923879532, T4h, T4a);
Chris@82 648 T4p = FMA(KP923879532, T4o, T4l);
Chris@82 649 T4q = FNMS(KP098491403, T4p, T4i);
Chris@82 650 T51 = FMA(KP098491403, T4i, T4p);
Chris@82 651 {
Chris@82 652 E T58, T59, T4D, T4K;
Chris@82 653 T58 = FNMS(KP923879532, T4h, T4a);
Chris@82 654 T59 = FNMS(KP923879532, T4o, T4l);
Chris@82 655 T5a = FMA(KP820678790, T59, T58);
Chris@82 656 T5k = FNMS(KP820678790, T58, T59);
Chris@82 657 T4D = FMA(KP923879532, T4C, T4v);
Chris@82 658 T4K = FMA(KP923879532, T4J, T4G);
Chris@82 659 T4L = FNMS(KP098491403, T4K, T4D);
Chris@82 660 T50 = FMA(KP098491403, T4D, T4K);
Chris@82 661 }
Chris@82 662 }
Chris@82 663 {
Chris@82 664 E T45, T4M, T4Z, T52;
Chris@82 665 T45 = FMA(KP980785280, T44, T3L);
Chris@82 666 T4M = T4q + T4L;
Chris@82 667 Cr[WS(csr, 31)] = FNMS(KP995184726, T4M, T45);
Chris@82 668 Cr[WS(csr, 1)] = FMA(KP995184726, T4M, T45);
Chris@82 669 T4Z = FMA(KP980785280, T4W, T4T);
Chris@82 670 T52 = T50 - T51;
Chris@82 671 Ci[WS(csi, 1)] = FMS(KP995184726, T52, T4Z);
Chris@82 672 Ci[WS(csi, 31)] = FMA(KP995184726, T52, T4Z);
Chris@82 673 }
Chris@82 674 {
Chris@82 675 E T4X, T4Y, T53, T54;
Chris@82 676 T4X = FNMS(KP980785280, T4W, T4T);
Chris@82 677 T4Y = T4L - T4q;
Chris@82 678 Ci[WS(csi, 15)] = FMA(KP995184726, T4Y, T4X);
Chris@82 679 Ci[WS(csi, 17)] = FMS(KP995184726, T4Y, T4X);
Chris@82 680 T53 = FNMS(KP980785280, T44, T3L);
Chris@82 681 T54 = T51 + T50;
Chris@82 682 Cr[WS(csr, 17)] = FNMS(KP995184726, T54, T53);
Chris@82 683 Cr[WS(csr, 15)] = FMA(KP995184726, T54, T53);
Chris@82 684 }
Chris@82 685 {
Chris@82 686 E T57, T5e, T5j, T5m;
Chris@82 687 T57 = FMA(KP980785280, T56, T55);
Chris@82 688 T5e = T5a + T5d;
Chris@82 689 Cr[WS(csr, 25)] = FNMS(KP773010453, T5e, T57);
Chris@82 690 Cr[WS(csr, 7)] = FMA(KP773010453, T5e, T57);
Chris@82 691 T5j = FMA(KP980785280, T5g, T5f);
Chris@82 692 T5m = T5k - T5l;
Chris@82 693 Ci[WS(csi, 7)] = FMA(KP773010453, T5m, T5j);
Chris@82 694 Ci[WS(csi, 25)] = FMS(KP773010453, T5m, T5j);
Chris@82 695 }
Chris@82 696 {
Chris@82 697 E T5h, T5i, T5n, T5o;
Chris@82 698 T5h = FNMS(KP980785280, T5g, T5f);
Chris@82 699 T5i = T5d - T5a;
Chris@82 700 Ci[WS(csi, 9)] = FMS(KP773010453, T5i, T5h);
Chris@82 701 Ci[WS(csi, 23)] = FMA(KP773010453, T5i, T5h);
Chris@82 702 T5n = FNMS(KP980785280, T56, T55);
Chris@82 703 T5o = T5k + T5l;
Chris@82 704 Cr[WS(csr, 23)] = FMA(KP773010453, T5o, T5n);
Chris@82 705 Cr[WS(csr, 9)] = FNMS(KP773010453, T5o, T5n);
Chris@82 706 }
Chris@82 707 }
Chris@82 708 }
Chris@82 709 }
Chris@82 710 }
Chris@82 711
Chris@82 712 static const kr2c_desc desc = { 64, "r2cf_64", {198, 0, 196, 0}, &GENUS };
Chris@82 713
Chris@82 714 void X(codelet_r2cf_64) (planner *p) {
Chris@82 715 X(kr2c_register) (p, r2cf_64, &desc);
Chris@82 716 }
Chris@82 717
Chris@82 718 #else
Chris@82 719
Chris@82 720 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include rdft/scalar/r2cf.h */
Chris@82 721
Chris@82 722 /*
Chris@82 723 * This function contains 394 FP additions, 124 FP multiplications,
Chris@82 724 * (or, 342 additions, 72 multiplications, 52 fused multiply/add),
Chris@82 725 * 106 stack variables, 15 constants, and 128 memory accesses
Chris@82 726 */
Chris@82 727 #include "rdft/scalar/r2cf.h"
Chris@82 728
Chris@82 729 static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 730 {
Chris@82 731 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 732 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@82 733 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@82 734 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 735 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@82 736 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 737 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@82 738 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 739 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 740 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 741 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 742 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 743 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 744 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 745 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 746 {
Chris@82 747 INT i;
Chris@82 748 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 749 E T4l, T5a, T15, T3n, T2T, T3Q, T7, Te, Tf, T4A, T4L, T1X, T3B, T23, T3y;
Chris@82 750 E T5I, T66, T4R, T52, T2j, T3F, T2H, T3I, T5P, T69, T1i, T3t, T1l, T3u, TZ;
Chris@82 751 E T63, T4v, T58, T1r, T3r, T1u, T3q, TK, T62, T4s, T57, Tm, Tt, Tu, T4o;
Chris@82 752 E T5b, T1c, T3R, T2Q, T3o, T1M, T3z, T5L, T67, T26, T3C, T4H, T4M, T2y, T3J;
Chris@82 753 E T5S, T6a, T2C, T3G, T4Y, T53;
Chris@82 754 {
Chris@82 755 E T3, T11, Td, T13, T6, T2S, Ta, T12, T14, T2R;
Chris@82 756 {
Chris@82 757 E T1, T2, Tb, Tc;
Chris@82 758 T1 = R0[0];
Chris@82 759 T2 = R0[WS(rs, 16)];
Chris@82 760 T3 = T1 + T2;
Chris@82 761 T11 = T1 - T2;
Chris@82 762 Tb = R0[WS(rs, 28)];
Chris@82 763 Tc = R0[WS(rs, 12)];
Chris@82 764 Td = Tb + Tc;
Chris@82 765 T13 = Tb - Tc;
Chris@82 766 }
Chris@82 767 {
Chris@82 768 E T4, T5, T8, T9;
Chris@82 769 T4 = R0[WS(rs, 8)];
Chris@82 770 T5 = R0[WS(rs, 24)];
Chris@82 771 T6 = T4 + T5;
Chris@82 772 T2S = T4 - T5;
Chris@82 773 T8 = R0[WS(rs, 4)];
Chris@82 774 T9 = R0[WS(rs, 20)];
Chris@82 775 Ta = T8 + T9;
Chris@82 776 T12 = T8 - T9;
Chris@82 777 }
Chris@82 778 T4l = T3 - T6;
Chris@82 779 T5a = Td - Ta;
Chris@82 780 T14 = KP707106781 * (T12 + T13);
Chris@82 781 T15 = T11 + T14;
Chris@82 782 T3n = T11 - T14;
Chris@82 783 T2R = KP707106781 * (T13 - T12);
Chris@82 784 T2T = T2R - T2S;
Chris@82 785 T3Q = T2S + T2R;
Chris@82 786 T7 = T3 + T6;
Chris@82 787 Te = Ta + Td;
Chris@82 788 Tf = T7 + Te;
Chris@82 789 }
Chris@82 790 {
Chris@82 791 E T1P, T4J, T21, T4y, T1S, T4K, T1W, T4z;
Chris@82 792 {
Chris@82 793 E T1N, T1O, T1Z, T20;
Chris@82 794 T1N = R1[WS(rs, 28)];
Chris@82 795 T1O = R1[WS(rs, 12)];
Chris@82 796 T1P = T1N - T1O;
Chris@82 797 T4J = T1N + T1O;
Chris@82 798 T1Z = R1[0];
Chris@82 799 T20 = R1[WS(rs, 16)];
Chris@82 800 T21 = T1Z - T20;
Chris@82 801 T4y = T1Z + T20;
Chris@82 802 }
Chris@82 803 {
Chris@82 804 E T1Q, T1R, T1U, T1V;
Chris@82 805 T1Q = R1[WS(rs, 4)];
Chris@82 806 T1R = R1[WS(rs, 20)];
Chris@82 807 T1S = T1Q - T1R;
Chris@82 808 T4K = T1Q + T1R;
Chris@82 809 T1U = R1[WS(rs, 8)];
Chris@82 810 T1V = R1[WS(rs, 24)];
Chris@82 811 T1W = T1U - T1V;
Chris@82 812 T4z = T1U + T1V;
Chris@82 813 }
Chris@82 814 T4A = T4y - T4z;
Chris@82 815 T4L = T4J - T4K;
Chris@82 816 {
Chris@82 817 E T1T, T22, T5G, T5H;
Chris@82 818 T1T = KP707106781 * (T1P - T1S);
Chris@82 819 T1X = T1T - T1W;
Chris@82 820 T3B = T1W + T1T;
Chris@82 821 T22 = KP707106781 * (T1S + T1P);
Chris@82 822 T23 = T21 + T22;
Chris@82 823 T3y = T21 - T22;
Chris@82 824 T5G = T4y + T4z;
Chris@82 825 T5H = T4K + T4J;
Chris@82 826 T5I = T5G + T5H;
Chris@82 827 T66 = T5G - T5H;
Chris@82 828 }
Chris@82 829 }
Chris@82 830 {
Chris@82 831 E T2b, T4P, T2G, T4Q, T2e, T51, T2h, T50;
Chris@82 832 {
Chris@82 833 E T29, T2a, T2E, T2F;
Chris@82 834 T29 = R1[WS(rs, 31)];
Chris@82 835 T2a = R1[WS(rs, 15)];
Chris@82 836 T2b = T29 - T2a;
Chris@82 837 T4P = T29 + T2a;
Chris@82 838 T2E = R1[WS(rs, 7)];
Chris@82 839 T2F = R1[WS(rs, 23)];
Chris@82 840 T2G = T2E - T2F;
Chris@82 841 T4Q = T2E + T2F;
Chris@82 842 }
Chris@82 843 {
Chris@82 844 E T2c, T2d, T2f, T2g;
Chris@82 845 T2c = R1[WS(rs, 3)];
Chris@82 846 T2d = R1[WS(rs, 19)];
Chris@82 847 T2e = T2c - T2d;
Chris@82 848 T51 = T2c + T2d;
Chris@82 849 T2f = R1[WS(rs, 27)];
Chris@82 850 T2g = R1[WS(rs, 11)];
Chris@82 851 T2h = T2f - T2g;
Chris@82 852 T50 = T2f + T2g;
Chris@82 853 }
Chris@82 854 T4R = T4P - T4Q;
Chris@82 855 T52 = T50 - T51;
Chris@82 856 {
Chris@82 857 E T2i, T2D, T5N, T5O;
Chris@82 858 T2i = KP707106781 * (T2e + T2h);
Chris@82 859 T2j = T2b + T2i;
Chris@82 860 T3F = T2b - T2i;
Chris@82 861 T2D = KP707106781 * (T2h - T2e);
Chris@82 862 T2H = T2D - T2G;
Chris@82 863 T3I = T2G + T2D;
Chris@82 864 T5N = T4P + T4Q;
Chris@82 865 T5O = T51 + T50;
Chris@82 866 T5P = T5N + T5O;
Chris@82 867 T69 = T5N - T5O;
Chris@82 868 }
Chris@82 869 }
Chris@82 870 {
Chris@82 871 E TN, T1e, TX, T1g, TQ, T1k, TU, T1f, T1h, T1j;
Chris@82 872 {
Chris@82 873 E TL, TM, TV, TW;
Chris@82 874 TL = R0[WS(rs, 31)];
Chris@82 875 TM = R0[WS(rs, 15)];
Chris@82 876 TN = TL + TM;
Chris@82 877 T1e = TL - TM;
Chris@82 878 TV = R0[WS(rs, 27)];
Chris@82 879 TW = R0[WS(rs, 11)];
Chris@82 880 TX = TV + TW;
Chris@82 881 T1g = TV - TW;
Chris@82 882 }
Chris@82 883 {
Chris@82 884 E TO, TP, TS, TT;
Chris@82 885 TO = R0[WS(rs, 7)];
Chris@82 886 TP = R0[WS(rs, 23)];
Chris@82 887 TQ = TO + TP;
Chris@82 888 T1k = TO - TP;
Chris@82 889 TS = R0[WS(rs, 3)];
Chris@82 890 TT = R0[WS(rs, 19)];
Chris@82 891 TU = TS + TT;
Chris@82 892 T1f = TS - TT;
Chris@82 893 }
Chris@82 894 T1h = KP707106781 * (T1f + T1g);
Chris@82 895 T1i = T1e + T1h;
Chris@82 896 T3t = T1e - T1h;
Chris@82 897 T1j = KP707106781 * (T1g - T1f);
Chris@82 898 T1l = T1j - T1k;
Chris@82 899 T3u = T1k + T1j;
Chris@82 900 {
Chris@82 901 E TR, TY, T4t, T4u;
Chris@82 902 TR = TN + TQ;
Chris@82 903 TY = TU + TX;
Chris@82 904 TZ = TR + TY;
Chris@82 905 T63 = TR - TY;
Chris@82 906 T4t = TN - TQ;
Chris@82 907 T4u = TX - TU;
Chris@82 908 T4v = FNMS(KP382683432, T4u, KP923879532 * T4t);
Chris@82 909 T58 = FMA(KP382683432, T4t, KP923879532 * T4u);
Chris@82 910 }
Chris@82 911 }
Chris@82 912 {
Chris@82 913 E Ty, T1s, TI, T1n, TB, T1q, TF, T1o, T1p, T1t;
Chris@82 914 {
Chris@82 915 E Tw, Tx, TG, TH;
Chris@82 916 Tw = R0[WS(rs, 1)];
Chris@82 917 Tx = R0[WS(rs, 17)];
Chris@82 918 Ty = Tw + Tx;
Chris@82 919 T1s = Tw - Tx;
Chris@82 920 TG = R0[WS(rs, 29)];
Chris@82 921 TH = R0[WS(rs, 13)];
Chris@82 922 TI = TG + TH;
Chris@82 923 T1n = TG - TH;
Chris@82 924 }
Chris@82 925 {
Chris@82 926 E Tz, TA, TD, TE;
Chris@82 927 Tz = R0[WS(rs, 9)];
Chris@82 928 TA = R0[WS(rs, 25)];
Chris@82 929 TB = Tz + TA;
Chris@82 930 T1q = Tz - TA;
Chris@82 931 TD = R0[WS(rs, 5)];
Chris@82 932 TE = R0[WS(rs, 21)];
Chris@82 933 TF = TD + TE;
Chris@82 934 T1o = TD - TE;
Chris@82 935 }
Chris@82 936 T1p = KP707106781 * (T1n - T1o);
Chris@82 937 T1r = T1p - T1q;
Chris@82 938 T3r = T1q + T1p;
Chris@82 939 T1t = KP707106781 * (T1o + T1n);
Chris@82 940 T1u = T1s + T1t;
Chris@82 941 T3q = T1s - T1t;
Chris@82 942 {
Chris@82 943 E TC, TJ, T4q, T4r;
Chris@82 944 TC = Ty + TB;
Chris@82 945 TJ = TF + TI;
Chris@82 946 TK = TC + TJ;
Chris@82 947 T62 = TC - TJ;
Chris@82 948 T4q = Ty - TB;
Chris@82 949 T4r = TI - TF;
Chris@82 950 T4s = FMA(KP923879532, T4q, KP382683432 * T4r);
Chris@82 951 T57 = FNMS(KP382683432, T4q, KP923879532 * T4r);
Chris@82 952 }
Chris@82 953 }
Chris@82 954 {
Chris@82 955 E Ti, T16, Ts, T1a, Tl, T17, Tp, T19, T4m, T4n;
Chris@82 956 {
Chris@82 957 E Tg, Th, Tq, Tr;
Chris@82 958 Tg = R0[WS(rs, 2)];
Chris@82 959 Th = R0[WS(rs, 18)];
Chris@82 960 Ti = Tg + Th;
Chris@82 961 T16 = Tg - Th;
Chris@82 962 Tq = R0[WS(rs, 6)];
Chris@82 963 Tr = R0[WS(rs, 22)];
Chris@82 964 Ts = Tq + Tr;
Chris@82 965 T1a = Tq - Tr;
Chris@82 966 }
Chris@82 967 {
Chris@82 968 E Tj, Tk, Tn, To;
Chris@82 969 Tj = R0[WS(rs, 10)];
Chris@82 970 Tk = R0[WS(rs, 26)];
Chris@82 971 Tl = Tj + Tk;
Chris@82 972 T17 = Tj - Tk;
Chris@82 973 Tn = R0[WS(rs, 30)];
Chris@82 974 To = R0[WS(rs, 14)];
Chris@82 975 Tp = Tn + To;
Chris@82 976 T19 = Tn - To;
Chris@82 977 }
Chris@82 978 Tm = Ti + Tl;
Chris@82 979 Tt = Tp + Ts;
Chris@82 980 Tu = Tm + Tt;
Chris@82 981 T4m = Ti - Tl;
Chris@82 982 T4n = Tp - Ts;
Chris@82 983 T4o = KP707106781 * (T4m + T4n);
Chris@82 984 T5b = KP707106781 * (T4n - T4m);
Chris@82 985 {
Chris@82 986 E T18, T1b, T2O, T2P;
Chris@82 987 T18 = FNMS(KP382683432, T17, KP923879532 * T16);
Chris@82 988 T1b = FMA(KP923879532, T19, KP382683432 * T1a);
Chris@82 989 T1c = T18 + T1b;
Chris@82 990 T3R = T1b - T18;
Chris@82 991 T2O = FNMS(KP923879532, T1a, KP382683432 * T19);
Chris@82 992 T2P = FMA(KP382683432, T16, KP923879532 * T17);
Chris@82 993 T2Q = T2O - T2P;
Chris@82 994 T3o = T2P + T2O;
Chris@82 995 }
Chris@82 996 }
Chris@82 997 {
Chris@82 998 E T1A, T4E, T1K, T4C, T1D, T4F, T1H, T4B;
Chris@82 999 {
Chris@82 1000 E T1y, T1z, T1I, T1J;
Chris@82 1001 T1y = R1[WS(rs, 30)];
Chris@82 1002 T1z = R1[WS(rs, 14)];
Chris@82 1003 T1A = T1y - T1z;
Chris@82 1004 T4E = T1y + T1z;
Chris@82 1005 T1I = R1[WS(rs, 10)];
Chris@82 1006 T1J = R1[WS(rs, 26)];
Chris@82 1007 T1K = T1I - T1J;
Chris@82 1008 T4C = T1I + T1J;
Chris@82 1009 }
Chris@82 1010 {
Chris@82 1011 E T1B, T1C, T1F, T1G;
Chris@82 1012 T1B = R1[WS(rs, 6)];
Chris@82 1013 T1C = R1[WS(rs, 22)];
Chris@82 1014 T1D = T1B - T1C;
Chris@82 1015 T4F = T1B + T1C;
Chris@82 1016 T1F = R1[WS(rs, 2)];
Chris@82 1017 T1G = R1[WS(rs, 18)];
Chris@82 1018 T1H = T1F - T1G;
Chris@82 1019 T4B = T1F + T1G;
Chris@82 1020 }
Chris@82 1021 {
Chris@82 1022 E T1E, T1L, T5J, T5K;
Chris@82 1023 T1E = FNMS(KP923879532, T1D, KP382683432 * T1A);
Chris@82 1024 T1L = FMA(KP382683432, T1H, KP923879532 * T1K);
Chris@82 1025 T1M = T1E - T1L;
Chris@82 1026 T3z = T1L + T1E;
Chris@82 1027 T5J = T4B + T4C;
Chris@82 1028 T5K = T4E + T4F;
Chris@82 1029 T5L = T5J + T5K;
Chris@82 1030 T67 = T5K - T5J;
Chris@82 1031 }
Chris@82 1032 {
Chris@82 1033 E T24, T25, T4D, T4G;
Chris@82 1034 T24 = FNMS(KP382683432, T1K, KP923879532 * T1H);
Chris@82 1035 T25 = FMA(KP923879532, T1A, KP382683432 * T1D);
Chris@82 1036 T26 = T24 + T25;
Chris@82 1037 T3C = T25 - T24;
Chris@82 1038 T4D = T4B - T4C;
Chris@82 1039 T4G = T4E - T4F;
Chris@82 1040 T4H = KP707106781 * (T4D + T4G);
Chris@82 1041 T4M = KP707106781 * (T4G - T4D);
Chris@82 1042 }
Chris@82 1043 }
Chris@82 1044 {
Chris@82 1045 E T2m, T4S, T2w, T4W, T2p, T4T, T2t, T4V;
Chris@82 1046 {
Chris@82 1047 E T2k, T2l, T2u, T2v;
Chris@82 1048 T2k = R1[WS(rs, 1)];
Chris@82 1049 T2l = R1[WS(rs, 17)];
Chris@82 1050 T2m = T2k - T2l;
Chris@82 1051 T4S = T2k + T2l;
Chris@82 1052 T2u = R1[WS(rs, 5)];
Chris@82 1053 T2v = R1[WS(rs, 21)];
Chris@82 1054 T2w = T2u - T2v;
Chris@82 1055 T4W = T2u + T2v;
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T2n, T2o, T2r, T2s;
Chris@82 1059 T2n = R1[WS(rs, 9)];
Chris@82 1060 T2o = R1[WS(rs, 25)];
Chris@82 1061 T2p = T2n - T2o;
Chris@82 1062 T4T = T2n + T2o;
Chris@82 1063 T2r = R1[WS(rs, 29)];
Chris@82 1064 T2s = R1[WS(rs, 13)];
Chris@82 1065 T2t = T2r - T2s;
Chris@82 1066 T4V = T2r + T2s;
Chris@82 1067 }
Chris@82 1068 {
Chris@82 1069 E T2q, T2x, T5Q, T5R;
Chris@82 1070 T2q = FNMS(KP382683432, T2p, KP923879532 * T2m);
Chris@82 1071 T2x = FMA(KP923879532, T2t, KP382683432 * T2w);
Chris@82 1072 T2y = T2q + T2x;
Chris@82 1073 T3J = T2x - T2q;
Chris@82 1074 T5Q = T4S + T4T;
Chris@82 1075 T5R = T4V + T4W;
Chris@82 1076 T5S = T5Q + T5R;
Chris@82 1077 T6a = T5R - T5Q;
Chris@82 1078 }
Chris@82 1079 {
Chris@82 1080 E T2A, T2B, T4U, T4X;
Chris@82 1081 T2A = FNMS(KP923879532, T2w, KP382683432 * T2t);
Chris@82 1082 T2B = FMA(KP382683432, T2m, KP923879532 * T2p);
Chris@82 1083 T2C = T2A - T2B;
Chris@82 1084 T3G = T2B + T2A;
Chris@82 1085 T4U = T4S - T4T;
Chris@82 1086 T4X = T4V - T4W;
Chris@82 1087 T4Y = KP707106781 * (T4U + T4X);
Chris@82 1088 T53 = KP707106781 * (T4X - T4U);
Chris@82 1089 }
Chris@82 1090 }
Chris@82 1091 {
Chris@82 1092 E Tv, T10, T5X, T5Y, T5Z, T60;
Chris@82 1093 Tv = Tf + Tu;
Chris@82 1094 T10 = TK + TZ;
Chris@82 1095 T5X = Tv + T10;
Chris@82 1096 T5Y = T5I + T5L;
Chris@82 1097 T5Z = T5P + T5S;
Chris@82 1098 T60 = T5Y + T5Z;
Chris@82 1099 Cr[WS(csr, 16)] = Tv - T10;
Chris@82 1100 Ci[WS(csi, 16)] = T5Z - T5Y;
Chris@82 1101 Cr[WS(csr, 32)] = T5X - T60;
Chris@82 1102 Cr[0] = T5X + T60;
Chris@82 1103 }
Chris@82 1104 {
Chris@82 1105 E T5F, T5V, T5U, T5W, T5M, T5T;
Chris@82 1106 T5F = Tf - Tu;
Chris@82 1107 T5V = TZ - TK;
Chris@82 1108 T5M = T5I - T5L;
Chris@82 1109 T5T = T5P - T5S;
Chris@82 1110 T5U = KP707106781 * (T5M + T5T);
Chris@82 1111 T5W = KP707106781 * (T5T - T5M);
Chris@82 1112 Cr[WS(csr, 24)] = T5F - T5U;
Chris@82 1113 Ci[WS(csi, 24)] = T5W - T5V;
Chris@82 1114 Cr[WS(csr, 8)] = T5F + T5U;
Chris@82 1115 Ci[WS(csi, 8)] = T5V + T5W;
Chris@82 1116 }
Chris@82 1117 {
Chris@82 1118 E T65, T6l, T6k, T6m, T6c, T6g, T6f, T6h;
Chris@82 1119 {
Chris@82 1120 E T61, T64, T6i, T6j;
Chris@82 1121 T61 = T7 - Te;
Chris@82 1122 T64 = KP707106781 * (T62 + T63);
Chris@82 1123 T65 = T61 + T64;
Chris@82 1124 T6l = T61 - T64;
Chris@82 1125 T6i = FNMS(KP382683432, T66, KP923879532 * T67);
Chris@82 1126 T6j = FMA(KP382683432, T69, KP923879532 * T6a);
Chris@82 1127 T6k = T6i + T6j;
Chris@82 1128 T6m = T6j - T6i;
Chris@82 1129 }
Chris@82 1130 {
Chris@82 1131 E T68, T6b, T6d, T6e;
Chris@82 1132 T68 = FMA(KP923879532, T66, KP382683432 * T67);
Chris@82 1133 T6b = FNMS(KP382683432, T6a, KP923879532 * T69);
Chris@82 1134 T6c = T68 + T6b;
Chris@82 1135 T6g = T6b - T68;
Chris@82 1136 T6d = KP707106781 * (T63 - T62);
Chris@82 1137 T6e = Tt - Tm;
Chris@82 1138 T6f = T6d - T6e;
Chris@82 1139 T6h = T6e + T6d;
Chris@82 1140 }
Chris@82 1141 Cr[WS(csr, 28)] = T65 - T6c;
Chris@82 1142 Ci[WS(csi, 28)] = T6k - T6h;
Chris@82 1143 Cr[WS(csr, 4)] = T65 + T6c;
Chris@82 1144 Ci[WS(csi, 4)] = T6h + T6k;
Chris@82 1145 Ci[WS(csi, 12)] = T6f + T6g;
Chris@82 1146 Cr[WS(csr, 12)] = T6l + T6m;
Chris@82 1147 Ci[WS(csi, 20)] = T6g - T6f;
Chris@82 1148 Cr[WS(csr, 20)] = T6l - T6m;
Chris@82 1149 }
Chris@82 1150 {
Chris@82 1151 E T5n, T5D, T5x, T5z, T5q, T5A, T5t, T5B;
Chris@82 1152 {
Chris@82 1153 E T5l, T5m, T5v, T5w;
Chris@82 1154 T5l = T4l - T4o;
Chris@82 1155 T5m = T58 - T57;
Chris@82 1156 T5n = T5l + T5m;
Chris@82 1157 T5D = T5l - T5m;
Chris@82 1158 T5v = T4v - T4s;
Chris@82 1159 T5w = T5b - T5a;
Chris@82 1160 T5x = T5v - T5w;
Chris@82 1161 T5z = T5w + T5v;
Chris@82 1162 }
Chris@82 1163 {
Chris@82 1164 E T5o, T5p, T5r, T5s;
Chris@82 1165 T5o = T4A - T4H;
Chris@82 1166 T5p = T4M - T4L;
Chris@82 1167 T5q = FMA(KP831469612, T5o, KP555570233 * T5p);
Chris@82 1168 T5A = FNMS(KP555570233, T5o, KP831469612 * T5p);
Chris@82 1169 T5r = T4R - T4Y;
Chris@82 1170 T5s = T53 - T52;
Chris@82 1171 T5t = FNMS(KP555570233, T5s, KP831469612 * T5r);
Chris@82 1172 T5B = FMA(KP555570233, T5r, KP831469612 * T5s);
Chris@82 1173 }
Chris@82 1174 {
Chris@82 1175 E T5u, T5C, T5y, T5E;
Chris@82 1176 T5u = T5q + T5t;
Chris@82 1177 Cr[WS(csr, 26)] = T5n - T5u;
Chris@82 1178 Cr[WS(csr, 6)] = T5n + T5u;
Chris@82 1179 T5C = T5A + T5B;
Chris@82 1180 Ci[WS(csi, 6)] = T5z + T5C;
Chris@82 1181 Ci[WS(csi, 26)] = T5C - T5z;
Chris@82 1182 T5y = T5t - T5q;
Chris@82 1183 Ci[WS(csi, 10)] = T5x + T5y;
Chris@82 1184 Ci[WS(csi, 22)] = T5y - T5x;
Chris@82 1185 T5E = T5B - T5A;
Chris@82 1186 Cr[WS(csr, 22)] = T5D - T5E;
Chris@82 1187 Cr[WS(csr, 10)] = T5D + T5E;
Chris@82 1188 }
Chris@82 1189 }
Chris@82 1190 {
Chris@82 1191 E T4x, T5j, T5d, T5f, T4O, T5g, T55, T5h;
Chris@82 1192 {
Chris@82 1193 E T4p, T4w, T59, T5c;
Chris@82 1194 T4p = T4l + T4o;
Chris@82 1195 T4w = T4s + T4v;
Chris@82 1196 T4x = T4p + T4w;
Chris@82 1197 T5j = T4p - T4w;
Chris@82 1198 T59 = T57 + T58;
Chris@82 1199 T5c = T5a + T5b;
Chris@82 1200 T5d = T59 - T5c;
Chris@82 1201 T5f = T5c + T59;
Chris@82 1202 }
Chris@82 1203 {
Chris@82 1204 E T4I, T4N, T4Z, T54;
Chris@82 1205 T4I = T4A + T4H;
Chris@82 1206 T4N = T4L + T4M;
Chris@82 1207 T4O = FMA(KP980785280, T4I, KP195090322 * T4N);
Chris@82 1208 T5g = FNMS(KP195090322, T4I, KP980785280 * T4N);
Chris@82 1209 T4Z = T4R + T4Y;
Chris@82 1210 T54 = T52 + T53;
Chris@82 1211 T55 = FNMS(KP195090322, T54, KP980785280 * T4Z);
Chris@82 1212 T5h = FMA(KP195090322, T4Z, KP980785280 * T54);
Chris@82 1213 }
Chris@82 1214 {
Chris@82 1215 E T56, T5i, T5e, T5k;
Chris@82 1216 T56 = T4O + T55;
Chris@82 1217 Cr[WS(csr, 30)] = T4x - T56;
Chris@82 1218 Cr[WS(csr, 2)] = T4x + T56;
Chris@82 1219 T5i = T5g + T5h;
Chris@82 1220 Ci[WS(csi, 2)] = T5f + T5i;
Chris@82 1221 Ci[WS(csi, 30)] = T5i - T5f;
Chris@82 1222 T5e = T55 - T4O;
Chris@82 1223 Ci[WS(csi, 14)] = T5d + T5e;
Chris@82 1224 Ci[WS(csi, 18)] = T5e - T5d;
Chris@82 1225 T5k = T5h - T5g;
Chris@82 1226 Cr[WS(csr, 18)] = T5j - T5k;
Chris@82 1227 Cr[WS(csr, 14)] = T5j + T5k;
Chris@82 1228 }
Chris@82 1229 }
Chris@82 1230 {
Chris@82 1231 E T3p, T41, T4c, T3S, T3w, T4b, T49, T4h, T3P, T42, T3E, T3W, T46, T4g, T3L;
Chris@82 1232 E T3X;
Chris@82 1233 {
Chris@82 1234 E T3s, T3v, T3A, T3D;
Chris@82 1235 T3p = T3n + T3o;
Chris@82 1236 T41 = T3n - T3o;
Chris@82 1237 T4c = T3R - T3Q;
Chris@82 1238 T3S = T3Q + T3R;
Chris@82 1239 T3s = FMA(KP831469612, T3q, KP555570233 * T3r);
Chris@82 1240 T3v = FNMS(KP555570233, T3u, KP831469612 * T3t);
Chris@82 1241 T3w = T3s + T3v;
Chris@82 1242 T4b = T3v - T3s;
Chris@82 1243 {
Chris@82 1244 E T47, T48, T3N, T3O;
Chris@82 1245 T47 = T3F - T3G;
Chris@82 1246 T48 = T3J - T3I;
Chris@82 1247 T49 = FNMS(KP471396736, T48, KP881921264 * T47);
Chris@82 1248 T4h = FMA(KP471396736, T47, KP881921264 * T48);
Chris@82 1249 T3N = FNMS(KP555570233, T3q, KP831469612 * T3r);
Chris@82 1250 T3O = FMA(KP555570233, T3t, KP831469612 * T3u);
Chris@82 1251 T3P = T3N + T3O;
Chris@82 1252 T42 = T3O - T3N;
Chris@82 1253 }
Chris@82 1254 T3A = T3y + T3z;
Chris@82 1255 T3D = T3B + T3C;
Chris@82 1256 T3E = FMA(KP956940335, T3A, KP290284677 * T3D);
Chris@82 1257 T3W = FNMS(KP290284677, T3A, KP956940335 * T3D);
Chris@82 1258 {
Chris@82 1259 E T44, T45, T3H, T3K;
Chris@82 1260 T44 = T3y - T3z;
Chris@82 1261 T45 = T3C - T3B;
Chris@82 1262 T46 = FMA(KP881921264, T44, KP471396736 * T45);
Chris@82 1263 T4g = FNMS(KP471396736, T44, KP881921264 * T45);
Chris@82 1264 T3H = T3F + T3G;
Chris@82 1265 T3K = T3I + T3J;
Chris@82 1266 T3L = FNMS(KP290284677, T3K, KP956940335 * T3H);
Chris@82 1267 T3X = FMA(KP290284677, T3H, KP956940335 * T3K);
Chris@82 1268 }
Chris@82 1269 }
Chris@82 1270 {
Chris@82 1271 E T3x, T3M, T3V, T3Y;
Chris@82 1272 T3x = T3p + T3w;
Chris@82 1273 T3M = T3E + T3L;
Chris@82 1274 Cr[WS(csr, 29)] = T3x - T3M;
Chris@82 1275 Cr[WS(csr, 3)] = T3x + T3M;
Chris@82 1276 T3V = T3S + T3P;
Chris@82 1277 T3Y = T3W + T3X;
Chris@82 1278 Ci[WS(csi, 3)] = T3V + T3Y;
Chris@82 1279 Ci[WS(csi, 29)] = T3Y - T3V;
Chris@82 1280 }
Chris@82 1281 {
Chris@82 1282 E T3T, T3U, T3Z, T40;
Chris@82 1283 T3T = T3P - T3S;
Chris@82 1284 T3U = T3L - T3E;
Chris@82 1285 Ci[WS(csi, 13)] = T3T + T3U;
Chris@82 1286 Ci[WS(csi, 19)] = T3U - T3T;
Chris@82 1287 T3Z = T3p - T3w;
Chris@82 1288 T40 = T3X - T3W;
Chris@82 1289 Cr[WS(csr, 19)] = T3Z - T40;
Chris@82 1290 Cr[WS(csr, 13)] = T3Z + T40;
Chris@82 1291 }
Chris@82 1292 {
Chris@82 1293 E T43, T4a, T4f, T4i;
Chris@82 1294 T43 = T41 + T42;
Chris@82 1295 T4a = T46 + T49;
Chris@82 1296 Cr[WS(csr, 27)] = T43 - T4a;
Chris@82 1297 Cr[WS(csr, 5)] = T43 + T4a;
Chris@82 1298 T4f = T4c + T4b;
Chris@82 1299 T4i = T4g + T4h;
Chris@82 1300 Ci[WS(csi, 5)] = T4f + T4i;
Chris@82 1301 Ci[WS(csi, 27)] = T4i - T4f;
Chris@82 1302 }
Chris@82 1303 {
Chris@82 1304 E T4d, T4e, T4j, T4k;
Chris@82 1305 T4d = T4b - T4c;
Chris@82 1306 T4e = T49 - T46;
Chris@82 1307 Ci[WS(csi, 11)] = T4d + T4e;
Chris@82 1308 Ci[WS(csi, 21)] = T4e - T4d;
Chris@82 1309 T4j = T41 - T42;
Chris@82 1310 T4k = T4h - T4g;
Chris@82 1311 Cr[WS(csr, 21)] = T4j - T4k;
Chris@82 1312 Cr[WS(csr, 11)] = T4j + T4k;
Chris@82 1313 }
Chris@82 1314 }
Chris@82 1315 {
Chris@82 1316 E T1d, T33, T3e, T2U, T1w, T3d, T3b, T3j, T2N, T34, T28, T2Y, T38, T3i, T2J;
Chris@82 1317 E T2Z;
Chris@82 1318 {
Chris@82 1319 E T1m, T1v, T1Y, T27;
Chris@82 1320 T1d = T15 - T1c;
Chris@82 1321 T33 = T15 + T1c;
Chris@82 1322 T3e = T2T + T2Q;
Chris@82 1323 T2U = T2Q - T2T;
Chris@82 1324 T1m = FMA(KP195090322, T1i, KP980785280 * T1l);
Chris@82 1325 T1v = FNMS(KP195090322, T1u, KP980785280 * T1r);
Chris@82 1326 T1w = T1m - T1v;
Chris@82 1327 T3d = T1v + T1m;
Chris@82 1328 {
Chris@82 1329 E T39, T3a, T2L, T2M;
Chris@82 1330 T39 = T2j + T2y;
Chris@82 1331 T3a = T2H + T2C;
Chris@82 1332 T3b = FNMS(KP098017140, T3a, KP995184726 * T39);
Chris@82 1333 T3j = FMA(KP995184726, T3a, KP098017140 * T39);
Chris@82 1334 T2L = FNMS(KP195090322, T1l, KP980785280 * T1i);
Chris@82 1335 T2M = FMA(KP980785280, T1u, KP195090322 * T1r);
Chris@82 1336 T2N = T2L - T2M;
Chris@82 1337 T34 = T2M + T2L;
Chris@82 1338 }
Chris@82 1339 T1Y = T1M - T1X;
Chris@82 1340 T27 = T23 - T26;
Chris@82 1341 T28 = FMA(KP634393284, T1Y, KP773010453 * T27);
Chris@82 1342 T2Y = FNMS(KP634393284, T27, KP773010453 * T1Y);
Chris@82 1343 {
Chris@82 1344 E T36, T37, T2z, T2I;
Chris@82 1345 T36 = T1X + T1M;
Chris@82 1346 T37 = T23 + T26;
Chris@82 1347 T38 = FMA(KP098017140, T36, KP995184726 * T37);
Chris@82 1348 T3i = FNMS(KP098017140, T37, KP995184726 * T36);
Chris@82 1349 T2z = T2j - T2y;
Chris@82 1350 T2I = T2C - T2H;
Chris@82 1351 T2J = FNMS(KP634393284, T2I, KP773010453 * T2z);
Chris@82 1352 T2Z = FMA(KP773010453, T2I, KP634393284 * T2z);
Chris@82 1353 }
Chris@82 1354 }
Chris@82 1355 {
Chris@82 1356 E T1x, T2K, T2X, T30;
Chris@82 1357 T1x = T1d + T1w;
Chris@82 1358 T2K = T28 + T2J;
Chris@82 1359 Cr[WS(csr, 25)] = T1x - T2K;
Chris@82 1360 Cr[WS(csr, 7)] = T1x + T2K;
Chris@82 1361 T2X = T2U + T2N;
Chris@82 1362 T30 = T2Y + T2Z;
Chris@82 1363 Ci[WS(csi, 7)] = T2X + T30;
Chris@82 1364 Ci[WS(csi, 25)] = T30 - T2X;
Chris@82 1365 }
Chris@82 1366 {
Chris@82 1367 E T2V, T2W, T31, T32;
Chris@82 1368 T2V = T2N - T2U;
Chris@82 1369 T2W = T2J - T28;
Chris@82 1370 Ci[WS(csi, 9)] = T2V + T2W;
Chris@82 1371 Ci[WS(csi, 23)] = T2W - T2V;
Chris@82 1372 T31 = T1d - T1w;
Chris@82 1373 T32 = T2Z - T2Y;
Chris@82 1374 Cr[WS(csr, 23)] = T31 - T32;
Chris@82 1375 Cr[WS(csr, 9)] = T31 + T32;
Chris@82 1376 }
Chris@82 1377 {
Chris@82 1378 E T35, T3c, T3h, T3k;
Chris@82 1379 T35 = T33 + T34;
Chris@82 1380 T3c = T38 + T3b;
Chris@82 1381 Cr[WS(csr, 31)] = T35 - T3c;
Chris@82 1382 Cr[WS(csr, 1)] = T35 + T3c;
Chris@82 1383 T3h = T3e + T3d;
Chris@82 1384 T3k = T3i + T3j;
Chris@82 1385 Ci[WS(csi, 1)] = T3h + T3k;
Chris@82 1386 Ci[WS(csi, 31)] = T3k - T3h;
Chris@82 1387 }
Chris@82 1388 {
Chris@82 1389 E T3f, T3g, T3l, T3m;
Chris@82 1390 T3f = T3d - T3e;
Chris@82 1391 T3g = T3b - T38;
Chris@82 1392 Ci[WS(csi, 15)] = T3f + T3g;
Chris@82 1393 Ci[WS(csi, 17)] = T3g - T3f;
Chris@82 1394 T3l = T33 - T34;
Chris@82 1395 T3m = T3j - T3i;
Chris@82 1396 Cr[WS(csr, 17)] = T3l - T3m;
Chris@82 1397 Cr[WS(csr, 15)] = T3l + T3m;
Chris@82 1398 }
Chris@82 1399 }
Chris@82 1400 }
Chris@82 1401 }
Chris@82 1402 }
Chris@82 1403
Chris@82 1404 static const kr2c_desc desc = { 64, "r2cf_64", {342, 72, 52, 0}, &GENUS };
Chris@82 1405
Chris@82 1406 void X(codelet_r2cf_64) (planner *p) {
Chris@82 1407 X(kr2c_register) (p, r2cf_64, &desc);
Chris@82 1408 }
Chris@82 1409
Chris@82 1410 #endif