annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cbIII_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:45 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@82 32 * (or, 238 additions, 64 multiplications, 196 fused multiply/add),
Chris@82 33 * 127 stack variables, 36 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cbIII.h"
Chris@82 36
Chris@82 37 static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP906347169, +0.906347169019147157946142717268914412664134293);
Chris@82 40 DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
Chris@82 41 DK(KP472964775, +0.472964775891319928124438237972992463904131113);
Chris@82 42 DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
Chris@82 43 DK(KP049126849, +0.049126849769467254105343321271313617079695752);
Chris@82 44 DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
Chris@82 45 DK(KP357805721, +0.357805721314524104672487743774474392487532769);
Chris@82 46 DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
Chris@82 47 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 48 DK(KP741650546, +0.741650546272035369581266691172079863842265220);
Chris@82 49 DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
Chris@82 50 DK(KP599376933, +0.599376933681923766271389869014404232837890546);
Chris@82 51 DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
Chris@82 52 DK(KP148335987, +0.148335987538347428753676511486911367000625355);
Chris@82 53 DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
Chris@82 54 DK(KP250486960, +0.250486960191305461595702160124721208578685568);
Chris@82 55 DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
Chris@82 56 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 57 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@82 58 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@82 59 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@82 60 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@82 61 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@82 62 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@82 63 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@82 64 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@82 65 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 66 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 67 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@82 68 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 69 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@82 70 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@82 71 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 72 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 73 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 74 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 75 {
Chris@82 76 INT i;
Chris@82 77 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 78 E T15, T3t, T3U, T2E, Tf, T6b, T6u, T6Q, T4L, T5J, T1g, T3V, T5q, T5U, T2H;
Chris@82 79 E T3u, Tu, T6v, T4V, T5r, T6e, T6R, T1s, T2K, T1D, T2J, T3B, T3X, T4Q, T5s;
Chris@82 80 E T3y, T3Y, TK, T6g, T57, T5M, T6j, T6N, T1W, T35, T25, T34, T3J, T4i, T52;
Chris@82 81 E T5N, T3G, T4j, TZ, T6l, T5i, T5P, T6o, T6M, T2n, T38, T2w, T37, T3Q, T4l;
Chris@82 82 E T5d, T5Q, T3N, T4m;
Chris@82 83 {
Chris@82 84 E T3, T11, T2D, T5m, T6, T2A, T14, T5n, Ta, T16, T19, T4I, Td, T1b, T1e;
Chris@82 85 E T4J;
Chris@82 86 {
Chris@82 87 E T1, T2, T2B, T2C;
Chris@82 88 T1 = Cr[0];
Chris@82 89 T2 = Cr[WS(csr, 31)];
Chris@82 90 T3 = T1 + T2;
Chris@82 91 T11 = T1 - T2;
Chris@82 92 T2B = Ci[0];
Chris@82 93 T2C = Ci[WS(csi, 31)];
Chris@82 94 T2D = T2B + T2C;
Chris@82 95 T5m = T2C - T2B;
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E T4, T5, T12, T13;
Chris@82 99 T4 = Cr[WS(csr, 16)];
Chris@82 100 T5 = Cr[WS(csr, 15)];
Chris@82 101 T6 = T4 + T5;
Chris@82 102 T2A = T4 - T5;
Chris@82 103 T12 = Ci[WS(csi, 16)];
Chris@82 104 T13 = Ci[WS(csi, 15)];
Chris@82 105 T14 = T12 + T13;
Chris@82 106 T5n = T13 - T12;
Chris@82 107 }
Chris@82 108 {
Chris@82 109 E T8, T9, T17, T18;
Chris@82 110 T8 = Cr[WS(csr, 8)];
Chris@82 111 T9 = Cr[WS(csr, 23)];
Chris@82 112 Ta = T8 + T9;
Chris@82 113 T16 = T8 - T9;
Chris@82 114 T17 = Ci[WS(csi, 8)];
Chris@82 115 T18 = Ci[WS(csi, 23)];
Chris@82 116 T19 = T17 + T18;
Chris@82 117 T4I = T18 - T17;
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E Tb, Tc, T1c, T1d;
Chris@82 121 Tb = Cr[WS(csr, 7)];
Chris@82 122 Tc = Cr[WS(csr, 24)];
Chris@82 123 Td = Tb + Tc;
Chris@82 124 T1b = Tb - Tc;
Chris@82 125 T1c = Ci[WS(csi, 7)];
Chris@82 126 T1d = Ci[WS(csi, 24)];
Chris@82 127 T1e = T1c + T1d;
Chris@82 128 T4J = T1c - T1d;
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E T7, Te, T1a, T1f;
Chris@82 132 T15 = T11 - T14;
Chris@82 133 T3t = T11 + T14;
Chris@82 134 T3U = T2A - T2D;
Chris@82 135 T2E = T2A + T2D;
Chris@82 136 T7 = T3 + T6;
Chris@82 137 Te = Ta + Td;
Chris@82 138 Tf = T7 + Te;
Chris@82 139 T6b = T7 - Te;
Chris@82 140 {
Chris@82 141 E T6s, T6t, T4H, T4K;
Chris@82 142 T6s = T5n + T5m;
Chris@82 143 T6t = T4I + T4J;
Chris@82 144 T6u = T6s - T6t;
Chris@82 145 T6Q = T6t + T6s;
Chris@82 146 T4H = T3 - T6;
Chris@82 147 T4K = T4I - T4J;
Chris@82 148 T4L = T4H + T4K;
Chris@82 149 T5J = T4H - T4K;
Chris@82 150 }
Chris@82 151 T1a = T16 - T19;
Chris@82 152 T1f = T1b - T1e;
Chris@82 153 T1g = T1a + T1f;
Chris@82 154 T3V = T1a - T1f;
Chris@82 155 {
Chris@82 156 E T5o, T5p, T2F, T2G;
Chris@82 157 T5o = T5m - T5n;
Chris@82 158 T5p = Ta - Td;
Chris@82 159 T5q = T5o - T5p;
Chris@82 160 T5U = T5p + T5o;
Chris@82 161 T2F = T1b + T1e;
Chris@82 162 T2G = T16 + T19;
Chris@82 163 T2H = T2F - T2G;
Chris@82 164 T3u = T2G + T2F;
Chris@82 165 }
Chris@82 166 }
Chris@82 167 }
Chris@82 168 {
Chris@82 169 E Ti, T1i, T1q, T4O, Tl, T1n, T1l, T4N, Tp, T1t, T1A, T4T, Ts, T1B, T1w;
Chris@82 170 E T4S;
Chris@82 171 {
Chris@82 172 E Tg, Th, T1o, T1p;
Chris@82 173 Tg = Cr[WS(csr, 4)];
Chris@82 174 Th = Cr[WS(csr, 27)];
Chris@82 175 Ti = Tg + Th;
Chris@82 176 T1i = Tg - Th;
Chris@82 177 T1o = Ci[WS(csi, 4)];
Chris@82 178 T1p = Ci[WS(csi, 27)];
Chris@82 179 T1q = T1o + T1p;
Chris@82 180 T4O = T1p - T1o;
Chris@82 181 }
Chris@82 182 {
Chris@82 183 E Tj, Tk, T1j, T1k;
Chris@82 184 Tj = Cr[WS(csr, 20)];
Chris@82 185 Tk = Cr[WS(csr, 11)];
Chris@82 186 Tl = Tj + Tk;
Chris@82 187 T1n = Tj - Tk;
Chris@82 188 T1j = Ci[WS(csi, 20)];
Chris@82 189 T1k = Ci[WS(csi, 11)];
Chris@82 190 T1l = T1j + T1k;
Chris@82 191 T4N = T1k - T1j;
Chris@82 192 }
Chris@82 193 {
Chris@82 194 E Tn, To, T1y, T1z;
Chris@82 195 Tn = Cr[WS(csr, 3)];
Chris@82 196 To = Cr[WS(csr, 28)];
Chris@82 197 Tp = Tn + To;
Chris@82 198 T1t = Tn - To;
Chris@82 199 T1y = Ci[WS(csi, 3)];
Chris@82 200 T1z = Ci[WS(csi, 28)];
Chris@82 201 T1A = T1y + T1z;
Chris@82 202 T4T = T1y - T1z;
Chris@82 203 }
Chris@82 204 {
Chris@82 205 E Tq, Tr, T1u, T1v;
Chris@82 206 Tq = Cr[WS(csr, 12)];
Chris@82 207 Tr = Cr[WS(csr, 19)];
Chris@82 208 Ts = Tq + Tr;
Chris@82 209 T1B = Tq - Tr;
Chris@82 210 T1u = Ci[WS(csi, 12)];
Chris@82 211 T1v = Ci[WS(csi, 19)];
Chris@82 212 T1w = T1u + T1v;
Chris@82 213 T4S = T1v - T1u;
Chris@82 214 }
Chris@82 215 {
Chris@82 216 E Tm, Tt, T4R, T4U;
Chris@82 217 Tm = Ti + Tl;
Chris@82 218 Tt = Tp + Ts;
Chris@82 219 Tu = Tm + Tt;
Chris@82 220 T6v = Tm - Tt;
Chris@82 221 T4R = Tp - Ts;
Chris@82 222 T4U = T4S - T4T;
Chris@82 223 T4V = T4R + T4U;
Chris@82 224 T5r = T4R - T4U;
Chris@82 225 }
Chris@82 226 {
Chris@82 227 E T6c, T6d, T1m, T1r;
Chris@82 228 T6c = T4N + T4O;
Chris@82 229 T6d = T4S + T4T;
Chris@82 230 T6e = T6c - T6d;
Chris@82 231 T6R = T6c + T6d;
Chris@82 232 T1m = T1i - T1l;
Chris@82 233 T1r = T1n + T1q;
Chris@82 234 T1s = FNMS(KP414213562, T1r, T1m);
Chris@82 235 T2K = FMA(KP414213562, T1m, T1r);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E T1x, T1C, T3z, T3A;
Chris@82 239 T1x = T1t - T1w;
Chris@82 240 T1C = T1A - T1B;
Chris@82 241 T1D = FNMS(KP414213562, T1C, T1x);
Chris@82 242 T2J = FMA(KP414213562, T1x, T1C);
Chris@82 243 T3z = T1B + T1A;
Chris@82 244 T3A = T1t + T1w;
Chris@82 245 T3B = FNMS(KP414213562, T3A, T3z);
Chris@82 246 T3X = FMA(KP414213562, T3z, T3A);
Chris@82 247 }
Chris@82 248 {
Chris@82 249 E T4M, T4P, T3w, T3x;
Chris@82 250 T4M = Ti - Tl;
Chris@82 251 T4P = T4N - T4O;
Chris@82 252 T4Q = T4M - T4P;
Chris@82 253 T5s = T4M + T4P;
Chris@82 254 T3w = T1n - T1q;
Chris@82 255 T3x = T1i + T1l;
Chris@82 256 T3y = FMA(KP414213562, T3x, T3w);
Chris@82 257 T3Y = FNMS(KP414213562, T3w, T3x);
Chris@82 258 }
Chris@82 259 }
Chris@82 260 {
Chris@82 261 E Ty, T1G, T20, T54, TB, T1X, T1J, T53, TI, T4Z, T1U, T22, TF, T50, T1P;
Chris@82 262 E T23;
Chris@82 263 {
Chris@82 264 E Tw, Tx, T1H, T1I;
Chris@82 265 Tw = Cr[WS(csr, 2)];
Chris@82 266 Tx = Cr[WS(csr, 29)];
Chris@82 267 Ty = Tw + Tx;
Chris@82 268 T1G = Tw - Tx;
Chris@82 269 {
Chris@82 270 E T1Y, T1Z, Tz, TA;
Chris@82 271 T1Y = Ci[WS(csi, 2)];
Chris@82 272 T1Z = Ci[WS(csi, 29)];
Chris@82 273 T20 = T1Y + T1Z;
Chris@82 274 T54 = T1Y - T1Z;
Chris@82 275 Tz = Cr[WS(csr, 18)];
Chris@82 276 TA = Cr[WS(csr, 13)];
Chris@82 277 TB = Tz + TA;
Chris@82 278 T1X = Tz - TA;
Chris@82 279 }
Chris@82 280 T1H = Ci[WS(csi, 18)];
Chris@82 281 T1I = Ci[WS(csi, 13)];
Chris@82 282 T1J = T1H + T1I;
Chris@82 283 T53 = T1H - T1I;
Chris@82 284 {
Chris@82 285 E TG, TH, T1Q, T1R, T1S, T1T;
Chris@82 286 TG = Cr[WS(csr, 5)];
Chris@82 287 TH = Cr[WS(csr, 26)];
Chris@82 288 T1Q = TG - TH;
Chris@82 289 T1R = Ci[WS(csi, 5)];
Chris@82 290 T1S = Ci[WS(csi, 26)];
Chris@82 291 T1T = T1R + T1S;
Chris@82 292 TI = TG + TH;
Chris@82 293 T4Z = T1S - T1R;
Chris@82 294 T1U = T1Q - T1T;
Chris@82 295 T22 = T1Q + T1T;
Chris@82 296 }
Chris@82 297 {
Chris@82 298 E TD, TE, T1L, T1M, T1N, T1O;
Chris@82 299 TD = Cr[WS(csr, 10)];
Chris@82 300 TE = Cr[WS(csr, 21)];
Chris@82 301 T1L = TD - TE;
Chris@82 302 T1M = Ci[WS(csi, 10)];
Chris@82 303 T1N = Ci[WS(csi, 21)];
Chris@82 304 T1O = T1M + T1N;
Chris@82 305 TF = TD + TE;
Chris@82 306 T50 = T1M - T1N;
Chris@82 307 T1P = T1L - T1O;
Chris@82 308 T23 = T1L + T1O;
Chris@82 309 }
Chris@82 310 }
Chris@82 311 {
Chris@82 312 E TC, TJ, T55, T56;
Chris@82 313 TC = Ty + TB;
Chris@82 314 TJ = TF + TI;
Chris@82 315 TK = TC + TJ;
Chris@82 316 T6g = TC - TJ;
Chris@82 317 T55 = T53 - T54;
Chris@82 318 T56 = TF - TI;
Chris@82 319 T57 = T55 - T56;
Chris@82 320 T5M = T56 + T55;
Chris@82 321 }
Chris@82 322 {
Chris@82 323 E T6h, T6i, T1K, T1V;
Chris@82 324 T6h = T53 + T54;
Chris@82 325 T6i = T50 + T4Z;
Chris@82 326 T6j = T6h - T6i;
Chris@82 327 T6N = T6i + T6h;
Chris@82 328 T1K = T1G - T1J;
Chris@82 329 T1V = T1P + T1U;
Chris@82 330 T1W = FMA(KP707106781, T1V, T1K);
Chris@82 331 T35 = FNMS(KP707106781, T1V, T1K);
Chris@82 332 }
Chris@82 333 {
Chris@82 334 E T21, T24, T3H, T3I;
Chris@82 335 T21 = T1X + T20;
Chris@82 336 T24 = T22 - T23;
Chris@82 337 T25 = FNMS(KP707106781, T24, T21);
Chris@82 338 T34 = FMA(KP707106781, T24, T21);
Chris@82 339 T3H = T1X - T20;
Chris@82 340 T3I = T1P - T1U;
Chris@82 341 T3J = FNMS(KP707106781, T3I, T3H);
Chris@82 342 T4i = FMA(KP707106781, T3I, T3H);
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T4Y, T51, T3E, T3F;
Chris@82 346 T4Y = Ty - TB;
Chris@82 347 T51 = T4Z - T50;
Chris@82 348 T52 = T4Y + T51;
Chris@82 349 T5N = T4Y - T51;
Chris@82 350 T3E = T1G + T1J;
Chris@82 351 T3F = T23 + T22;
Chris@82 352 T3G = FNMS(KP707106781, T3F, T3E);
Chris@82 353 T4j = FMA(KP707106781, T3F, T3E);
Chris@82 354 }
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E TN, T27, T2q, T5f, TQ, T2r, T2a, T5e, TX, T5a, T2l, T2t, TU, T5b, T2g;
Chris@82 358 E T2u;
Chris@82 359 {
Chris@82 360 E TL, TM, T28, T29;
Chris@82 361 TL = Cr[WS(csr, 1)];
Chris@82 362 TM = Cr[WS(csr, 30)];
Chris@82 363 TN = TL + TM;
Chris@82 364 T27 = TL - TM;
Chris@82 365 {
Chris@82 366 E T2o, T2p, TO, TP;
Chris@82 367 T2o = Ci[WS(csi, 1)];
Chris@82 368 T2p = Ci[WS(csi, 30)];
Chris@82 369 T2q = T2o + T2p;
Chris@82 370 T5f = T2p - T2o;
Chris@82 371 TO = Cr[WS(csr, 14)];
Chris@82 372 TP = Cr[WS(csr, 17)];
Chris@82 373 TQ = TO + TP;
Chris@82 374 T2r = TO - TP;
Chris@82 375 }
Chris@82 376 T28 = Ci[WS(csi, 14)];
Chris@82 377 T29 = Ci[WS(csi, 17)];
Chris@82 378 T2a = T28 + T29;
Chris@82 379 T5e = T28 - T29;
Chris@82 380 {
Chris@82 381 E TV, TW, T2h, T2i, T2j, T2k;
Chris@82 382 TV = Cr[WS(csr, 9)];
Chris@82 383 TW = Cr[WS(csr, 22)];
Chris@82 384 T2h = TV - TW;
Chris@82 385 T2i = Ci[WS(csi, 9)];
Chris@82 386 T2j = Ci[WS(csi, 22)];
Chris@82 387 T2k = T2i + T2j;
Chris@82 388 TX = TV + TW;
Chris@82 389 T5a = T2j - T2i;
Chris@82 390 T2l = T2h - T2k;
Chris@82 391 T2t = T2h + T2k;
Chris@82 392 }
Chris@82 393 {
Chris@82 394 E TS, TT, T2c, T2d, T2e, T2f;
Chris@82 395 TS = Cr[WS(csr, 6)];
Chris@82 396 TT = Cr[WS(csr, 25)];
Chris@82 397 T2c = TS - TT;
Chris@82 398 T2d = Ci[WS(csi, 6)];
Chris@82 399 T2e = Ci[WS(csi, 25)];
Chris@82 400 T2f = T2d + T2e;
Chris@82 401 TU = TS + TT;
Chris@82 402 T5b = T2d - T2e;
Chris@82 403 T2g = T2c - T2f;
Chris@82 404 T2u = T2c + T2f;
Chris@82 405 }
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E TR, TY, T5g, T5h;
Chris@82 409 TR = TN + TQ;
Chris@82 410 TY = TU + TX;
Chris@82 411 TZ = TR + TY;
Chris@82 412 T6l = TR - TY;
Chris@82 413 T5g = T5e - T5f;
Chris@82 414 T5h = TU - TX;
Chris@82 415 T5i = T5g - T5h;
Chris@82 416 T5P = T5h + T5g;
Chris@82 417 }
Chris@82 418 {
Chris@82 419 E T6m, T6n, T2b, T2m;
Chris@82 420 T6m = T5e + T5f;
Chris@82 421 T6n = T5b + T5a;
Chris@82 422 T6o = T6m - T6n;
Chris@82 423 T6M = T6n + T6m;
Chris@82 424 T2b = T27 - T2a;
Chris@82 425 T2m = T2g + T2l;
Chris@82 426 T2n = FMA(KP707106781, T2m, T2b);
Chris@82 427 T38 = FNMS(KP707106781, T2m, T2b);
Chris@82 428 }
Chris@82 429 {
Chris@82 430 E T2s, T2v, T3O, T3P;
Chris@82 431 T2s = T2q - T2r;
Chris@82 432 T2v = T2t - T2u;
Chris@82 433 T2w = FMA(KP707106781, T2v, T2s);
Chris@82 434 T37 = FNMS(KP707106781, T2v, T2s);
Chris@82 435 T3O = T2r + T2q;
Chris@82 436 T3P = T2g - T2l;
Chris@82 437 T3Q = FNMS(KP707106781, T3P, T3O);
Chris@82 438 T4l = FMA(KP707106781, T3P, T3O);
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E T59, T5c, T3L, T3M;
Chris@82 442 T59 = TN - TQ;
Chris@82 443 T5c = T5a - T5b;
Chris@82 444 T5d = T59 + T5c;
Chris@82 445 T5Q = T59 - T5c;
Chris@82 446 T3L = T27 + T2a;
Chris@82 447 T3M = T2u + T2t;
Chris@82 448 T3N = FNMS(KP707106781, T3M, T3L);
Chris@82 449 T4m = FMA(KP707106781, T3M, T3L);
Chris@82 450 }
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E Tv, T10, T6X, T6Y, T6Z, T70;
Chris@82 454 Tv = Tf + Tu;
Chris@82 455 T10 = TK + TZ;
Chris@82 456 T6X = Tv - T10;
Chris@82 457 T6Y = T6N + T6M;
Chris@82 458 T6Z = T6R + T6Q;
Chris@82 459 T70 = T6Y + T6Z;
Chris@82 460 R0[0] = KP2_000000000 * (Tv + T10);
Chris@82 461 R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y);
Chris@82 462 R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70);
Chris@82 463 R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X);
Chris@82 464 }
Chris@82 465 {
Chris@82 466 E T6P, T6W, T6U, T6V;
Chris@82 467 {
Chris@82 468 E T6L, T6O, T6S, T6T;
Chris@82 469 T6L = Tf - Tu;
Chris@82 470 T6O = T6M - T6N;
Chris@82 471 T6P = T6L + T6O;
Chris@82 472 T6W = T6L - T6O;
Chris@82 473 T6S = T6Q - T6R;
Chris@82 474 T6T = TK - TZ;
Chris@82 475 T6U = T6S - T6T;
Chris@82 476 T6V = T6T + T6S;
Chris@82 477 }
Chris@82 478 R0[WS(rs, 4)] = KP1_847759065 * (FMA(KP414213562, T6U, T6P));
Chris@82 479 R0[WS(rs, 28)] = -(KP1_847759065 * (FNMS(KP414213562, T6V, T6W)));
Chris@82 480 R0[WS(rs, 20)] = KP1_847759065 * (FNMS(KP414213562, T6P, T6U));
Chris@82 481 R0[WS(rs, 12)] = KP1_847759065 * (FMA(KP414213562, T6W, T6V));
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T6f, T6w, T6G, T6D, T6z, T6E, T6q, T6H;
Chris@82 485 T6f = T6b + T6e;
Chris@82 486 T6w = T6u - T6v;
Chris@82 487 T6G = T6v + T6u;
Chris@82 488 T6D = T6b - T6e;
Chris@82 489 {
Chris@82 490 E T6x, T6y, T6k, T6p;
Chris@82 491 T6x = T6l - T6o;
Chris@82 492 T6y = T6g + T6j;
Chris@82 493 T6z = T6x - T6y;
Chris@82 494 T6E = T6y + T6x;
Chris@82 495 T6k = T6g - T6j;
Chris@82 496 T6p = T6l + T6o;
Chris@82 497 T6q = T6k + T6p;
Chris@82 498 T6H = T6k - T6p;
Chris@82 499 }
Chris@82 500 {
Chris@82 501 E T6r, T6A, T6J, T6K;
Chris@82 502 T6r = FMA(KP707106781, T6q, T6f);
Chris@82 503 T6A = FMA(KP707106781, T6z, T6w);
Chris@82 504 R0[WS(rs, 2)] = KP1_961570560 * (FMA(KP198912367, T6A, T6r));
Chris@82 505 R0[WS(rs, 18)] = KP1_961570560 * (FNMS(KP198912367, T6r, T6A));
Chris@82 506 T6J = FMA(KP707106781, T6H, T6G);
Chris@82 507 T6K = FMA(KP707106781, T6E, T6D);
Chris@82 508 R0[WS(rs, 14)] = KP1_961570560 * (FMA(KP198912367, T6K, T6J));
Chris@82 509 R0[WS(rs, 30)] = -(KP1_961570560 * (FNMS(KP198912367, T6J, T6K)));
Chris@82 510 }
Chris@82 511 {
Chris@82 512 E T6B, T6C, T6F, T6I;
Chris@82 513 T6B = FNMS(KP707106781, T6z, T6w);
Chris@82 514 T6C = FNMS(KP707106781, T6q, T6f);
Chris@82 515 R0[WS(rs, 10)] = KP1_662939224 * (FMA(KP668178637, T6C, T6B));
Chris@82 516 R0[WS(rs, 26)] = -(KP1_662939224 * (FNMS(KP668178637, T6B, T6C)));
Chris@82 517 T6F = FNMS(KP707106781, T6E, T6D);
Chris@82 518 T6I = FNMS(KP707106781, T6H, T6G);
Chris@82 519 R0[WS(rs, 6)] = KP1_662939224 * (FMA(KP668178637, T6I, T6F));
Chris@82 520 R0[WS(rs, 22)] = KP1_662939224 * (FNMS(KP668178637, T6F, T6I));
Chris@82 521 }
Chris@82 522 }
Chris@82 523 {
Chris@82 524 E T5L, T63, T5W, T66, T5S, T67, T5Z, T64, T5K, T5V;
Chris@82 525 T5K = T5s + T5r;
Chris@82 526 T5L = FNMS(KP707106781, T5K, T5J);
Chris@82 527 T63 = FMA(KP707106781, T5K, T5J);
Chris@82 528 T5V = T4Q - T4V;
Chris@82 529 T5W = FNMS(KP707106781, T5V, T5U);
Chris@82 530 T66 = FMA(KP707106781, T5V, T5U);
Chris@82 531 {
Chris@82 532 E T5O, T5R, T5X, T5Y;
Chris@82 533 T5O = FMA(KP414213562, T5N, T5M);
Chris@82 534 T5R = FNMS(KP414213562, T5Q, T5P);
Chris@82 535 T5S = T5O - T5R;
Chris@82 536 T67 = T5O + T5R;
Chris@82 537 T5X = FMA(KP414213562, T5P, T5Q);
Chris@82 538 T5Y = FNMS(KP414213562, T5M, T5N);
Chris@82 539 T5Z = T5X - T5Y;
Chris@82 540 T64 = T5Y + T5X;
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T5T, T60, T69, T6a;
Chris@82 544 T5T = FMA(KP923879532, T5S, T5L);
Chris@82 545 T60 = FMA(KP923879532, T5Z, T5W);
Chris@82 546 R0[WS(rs, 3)] = KP1_913880671 * (FMA(KP303346683, T60, T5T));
Chris@82 547 R0[WS(rs, 19)] = KP1_913880671 * (FNMS(KP303346683, T5T, T60));
Chris@82 548 T69 = FMA(KP923879532, T67, T66);
Chris@82 549 T6a = FMA(KP923879532, T64, T63);
Chris@82 550 R0[WS(rs, 15)] = KP1_990369453 * (FMA(KP098491403, T6a, T69));
Chris@82 551 R0[WS(rs, 31)] = -(KP1_990369453 * (FNMS(KP098491403, T69, T6a)));
Chris@82 552 }
Chris@82 553 {
Chris@82 554 E T61, T62, T65, T68;
Chris@82 555 T61 = FNMS(KP923879532, T5Z, T5W);
Chris@82 556 T62 = FNMS(KP923879532, T5S, T5L);
Chris@82 557 R0[WS(rs, 11)] = KP1_763842528 * (FMA(KP534511135, T62, T61));
Chris@82 558 R0[WS(rs, 27)] = -(KP1_763842528 * (FNMS(KP534511135, T61, T62)));
Chris@82 559 T65 = FNMS(KP923879532, T64, T63);
Chris@82 560 T68 = FNMS(KP923879532, T67, T66);
Chris@82 561 R0[WS(rs, 7)] = KP1_546020906 * (FMA(KP820678790, T68, T65));
Chris@82 562 R0[WS(rs, 23)] = KP1_546020906 * (FNMS(KP820678790, T65, T68));
Chris@82 563 }
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T4X, T5B, T5u, T5E, T5k, T5F, T5x, T5C, T4W, T5t;
Chris@82 567 T4W = T4Q + T4V;
Chris@82 568 T4X = FMA(KP707106781, T4W, T4L);
Chris@82 569 T5B = FNMS(KP707106781, T4W, T4L);
Chris@82 570 T5t = T5r - T5s;
Chris@82 571 T5u = FMA(KP707106781, T5t, T5q);
Chris@82 572 T5E = FNMS(KP707106781, T5t, T5q);
Chris@82 573 {
Chris@82 574 E T58, T5j, T5v, T5w;
Chris@82 575 T58 = FMA(KP414213562, T57, T52);
Chris@82 576 T5j = FNMS(KP414213562, T5i, T5d);
Chris@82 577 T5k = T58 + T5j;
Chris@82 578 T5F = T58 - T5j;
Chris@82 579 T5v = FNMS(KP414213562, T52, T57);
Chris@82 580 T5w = FMA(KP414213562, T5d, T5i);
Chris@82 581 T5x = T5v + T5w;
Chris@82 582 T5C = T5w - T5v;
Chris@82 583 }
Chris@82 584 {
Chris@82 585 E T5l, T5y, T5H, T5I;
Chris@82 586 T5l = FMA(KP923879532, T5k, T4X);
Chris@82 587 T5y = FMA(KP923879532, T5x, T5u);
Chris@82 588 R0[WS(rs, 1)] = KP1_990369453 * (FMA(KP098491403, T5y, T5l));
Chris@82 589 R0[WS(rs, 17)] = KP1_990369453 * (FNMS(KP098491403, T5l, T5y));
Chris@82 590 T5H = FMA(KP923879532, T5F, T5E);
Chris@82 591 T5I = FMA(KP923879532, T5C, T5B);
Chris@82 592 R0[WS(rs, 13)] = KP1_913880671 * (FMA(KP303346683, T5I, T5H));
Chris@82 593 R0[WS(rs, 29)] = -(KP1_913880671 * (FNMS(KP303346683, T5H, T5I)));
Chris@82 594 }
Chris@82 595 {
Chris@82 596 E T5z, T5A, T5D, T5G;
Chris@82 597 T5z = FNMS(KP923879532, T5x, T5u);
Chris@82 598 T5A = FNMS(KP923879532, T5k, T4X);
Chris@82 599 R0[WS(rs, 9)] = KP1_546020906 * (FMA(KP820678790, T5A, T5z));
Chris@82 600 R0[WS(rs, 25)] = -(KP1_546020906 * (FNMS(KP820678790, T5z, T5A)));
Chris@82 601 T5D = FNMS(KP923879532, T5C, T5B);
Chris@82 602 T5G = FNMS(KP923879532, T5F, T5E);
Chris@82 603 R0[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T5G, T5D));
Chris@82 604 R0[WS(rs, 21)] = KP1_763842528 * (FNMS(KP534511135, T5D, T5G));
Chris@82 605 }
Chris@82 606 }
Chris@82 607 {
Chris@82 608 E T33, T3l, T3h, T3m, T3a, T3p, T3e, T3o;
Chris@82 609 {
Chris@82 610 E T31, T32, T3f, T3g;
Chris@82 611 T31 = FNMS(KP707106781, T1g, T15);
Chris@82 612 T32 = T2K + T2J;
Chris@82 613 T33 = FNMS(KP923879532, T32, T31);
Chris@82 614 T3l = FMA(KP923879532, T32, T31);
Chris@82 615 T3f = FMA(KP668178637, T37, T38);
Chris@82 616 T3g = FMA(KP668178637, T34, T35);
Chris@82 617 T3h = T3f - T3g;
Chris@82 618 T3m = T3g + T3f;
Chris@82 619 }
Chris@82 620 {
Chris@82 621 E T36, T39, T3c, T3d;
Chris@82 622 T36 = FNMS(KP668178637, T35, T34);
Chris@82 623 T39 = FNMS(KP668178637, T38, T37);
Chris@82 624 T3a = T36 + T39;
Chris@82 625 T3p = T39 - T36;
Chris@82 626 T3c = FMA(KP707106781, T2H, T2E);
Chris@82 627 T3d = T1s - T1D;
Chris@82 628 T3e = FMA(KP923879532, T3d, T3c);
Chris@82 629 T3o = FNMS(KP923879532, T3d, T3c);
Chris@82 630 }
Chris@82 631 {
Chris@82 632 E T3b, T3i, T3r, T3s;
Chris@82 633 T3b = FNMS(KP831469612, T3a, T33);
Chris@82 634 T3i = FNMS(KP831469612, T3h, T3e);
Chris@82 635 R1[WS(rs, 2)] = KP1_940062506 * (FNMS(KP250486960, T3i, T3b));
Chris@82 636 R1[WS(rs, 18)] = -(KP1_940062506 * (FMA(KP250486960, T3b, T3i)));
Chris@82 637 T3r = FNMS(KP831469612, T3p, T3o);
Chris@82 638 T3s = FMA(KP831469612, T3m, T3l);
Chris@82 639 R1[WS(rs, 14)] = -(KP1_978353019 * (FNMS(KP148335987, T3s, T3r)));
Chris@82 640 R1[WS(rs, 30)] = -(KP1_978353019 * (FMA(KP148335987, T3r, T3s)));
Chris@82 641 }
Chris@82 642 {
Chris@82 643 E T3j, T3k, T3n, T3q;
Chris@82 644 T3j = FMA(KP831469612, T3h, T3e);
Chris@82 645 T3k = FMA(KP831469612, T3a, T33);
Chris@82 646 R1[WS(rs, 10)] = -(KP1_715457220 * (FNMS(KP599376933, T3k, T3j)));
Chris@82 647 R1[WS(rs, 26)] = -(KP1_715457220 * (FMA(KP599376933, T3j, T3k)));
Chris@82 648 T3n = FNMS(KP831469612, T3m, T3l);
Chris@82 649 T3q = FMA(KP831469612, T3p, T3o);
Chris@82 650 R1[WS(rs, 6)] = KP1_606415062 * (FNMS(KP741650546, T3q, T3n));
Chris@82 651 R1[WS(rs, 22)] = -(KP1_606415062 * (FMA(KP741650546, T3n, T3q)));
Chris@82 652 }
Chris@82 653 }
Chris@82 654 {
Chris@82 655 E T4h, T4z, T4v, T4A, T4o, T4D, T4s, T4C;
Chris@82 656 {
Chris@82 657 E T4f, T4g, T4t, T4u;
Chris@82 658 T4f = FMA(KP707106781, T3u, T3t);
Chris@82 659 T4g = T3Y + T3X;
Chris@82 660 T4h = FNMS(KP923879532, T4g, T4f);
Chris@82 661 T4z = FMA(KP923879532, T4g, T4f);
Chris@82 662 T4t = FMA(KP198912367, T4l, T4m);
Chris@82 663 T4u = FNMS(KP198912367, T4i, T4j);
Chris@82 664 T4v = T4t - T4u;
Chris@82 665 T4A = T4u + T4t;
Chris@82 666 }
Chris@82 667 {
Chris@82 668 E T4k, T4n, T4q, T4r;
Chris@82 669 T4k = FMA(KP198912367, T4j, T4i);
Chris@82 670 T4n = FNMS(KP198912367, T4m, T4l);
Chris@82 671 T4o = T4k - T4n;
Chris@82 672 T4D = T4k + T4n;
Chris@82 673 T4q = FMA(KP707106781, T3V, T3U);
Chris@82 674 T4r = T3y + T3B;
Chris@82 675 T4s = FNMS(KP923879532, T4r, T4q);
Chris@82 676 T4C = FMA(KP923879532, T4r, T4q);
Chris@82 677 }
Chris@82 678 {
Chris@82 679 E T4p, T4w, T4F, T4G;
Chris@82 680 T4p = FMA(KP980785280, T4o, T4h);
Chris@82 681 T4w = FMA(KP980785280, T4v, T4s);
Chris@82 682 R1[WS(rs, 3)] = KP1_883088130 * (FMA(KP357805721, T4w, T4p));
Chris@82 683 R1[WS(rs, 19)] = KP1_883088130 * (FNMS(KP357805721, T4p, T4w));
Chris@82 684 T4F = FMA(KP980785280, T4D, T4C);
Chris@82 685 T4G = FMA(KP980785280, T4A, T4z);
Chris@82 686 R1[WS(rs, 15)] = KP1_997590912 * (FMA(KP049126849, T4G, T4F));
Chris@82 687 R1[WS(rs, 31)] = -(KP1_997590912 * (FNMS(KP049126849, T4F, T4G)));
Chris@82 688 }
Chris@82 689 {
Chris@82 690 E T4x, T4y, T4B, T4E;
Chris@82 691 T4x = FNMS(KP980785280, T4v, T4s);
Chris@82 692 T4y = FNMS(KP980785280, T4o, T4h);
Chris@82 693 R1[WS(rs, 11)] = KP1_807978586 * (FMA(KP472964775, T4y, T4x));
Chris@82 694 R1[WS(rs, 27)] = -(KP1_807978586 * (FNMS(KP472964775, T4x, T4y)));
Chris@82 695 T4B = FNMS(KP980785280, T4A, T4z);
Chris@82 696 T4E = FNMS(KP980785280, T4D, T4C);
Chris@82 697 R1[WS(rs, 7)] = KP1_481902250 * (FMA(KP906347169, T4E, T4B));
Chris@82 698 R1[WS(rs, 23)] = KP1_481902250 * (FNMS(KP906347169, T4B, T4E));
Chris@82 699 }
Chris@82 700 }
Chris@82 701 {
Chris@82 702 E T1F, T2T, T2P, T2U, T2y, T2X, T2M, T2W;
Chris@82 703 {
Chris@82 704 E T1h, T1E, T2N, T2O;
Chris@82 705 T1h = FMA(KP707106781, T1g, T15);
Chris@82 706 T1E = T1s + T1D;
Chris@82 707 T1F = FMA(KP923879532, T1E, T1h);
Chris@82 708 T2T = FNMS(KP923879532, T1E, T1h);
Chris@82 709 T2N = FMA(KP198912367, T2n, T2w);
Chris@82 710 T2O = FMA(KP198912367, T1W, T25);
Chris@82 711 T2P = T2N - T2O;
Chris@82 712 T2U = T2O + T2N;
Chris@82 713 }
Chris@82 714 {
Chris@82 715 E T26, T2x, T2I, T2L;
Chris@82 716 T26 = FNMS(KP198912367, T25, T1W);
Chris@82 717 T2x = FNMS(KP198912367, T2w, T2n);
Chris@82 718 T2y = T26 + T2x;
Chris@82 719 T2X = T26 - T2x;
Chris@82 720 T2I = FNMS(KP707106781, T2H, T2E);
Chris@82 721 T2L = T2J - T2K;
Chris@82 722 T2M = FNMS(KP923879532, T2L, T2I);
Chris@82 723 T2W = FMA(KP923879532, T2L, T2I);
Chris@82 724 }
Chris@82 725 {
Chris@82 726 E T2z, T2Q, T2Z, T30;
Chris@82 727 T2z = FMA(KP980785280, T2y, T1F);
Chris@82 728 T2Q = FNMS(KP980785280, T2P, T2M);
Chris@82 729 R1[0] = KP1_997590912 * (FNMS(KP049126849, T2Q, T2z));
Chris@82 730 R1[WS(rs, 16)] = -(KP1_997590912 * (FMA(KP049126849, T2z, T2Q)));
Chris@82 731 T2Z = FNMS(KP980785280, T2X, T2W);
Chris@82 732 T30 = FMA(KP980785280, T2U, T2T);
Chris@82 733 R1[WS(rs, 12)] = -(KP1_883088130 * (FNMS(KP357805721, T30, T2Z)));
Chris@82 734 R1[WS(rs, 28)] = -(KP1_883088130 * (FMA(KP357805721, T2Z, T30)));
Chris@82 735 }
Chris@82 736 {
Chris@82 737 E T2R, T2S, T2V, T2Y;
Chris@82 738 T2R = FMA(KP980785280, T2P, T2M);
Chris@82 739 T2S = FNMS(KP980785280, T2y, T1F);
Chris@82 740 R1[WS(rs, 8)] = -(KP1_481902250 * (FNMS(KP906347169, T2S, T2R)));
Chris@82 741 R1[WS(rs, 24)] = -(KP1_481902250 * (FMA(KP906347169, T2R, T2S)));
Chris@82 742 T2V = FNMS(KP980785280, T2U, T2T);
Chris@82 743 T2Y = FMA(KP980785280, T2X, T2W);
Chris@82 744 R1[WS(rs, 4)] = KP1_807978586 * (FNMS(KP472964775, T2Y, T2V));
Chris@82 745 R1[WS(rs, 20)] = -(KP1_807978586 * (FMA(KP472964775, T2V, T2Y)));
Chris@82 746 }
Chris@82 747 }
Chris@82 748 {
Chris@82 749 E T3D, T47, T43, T48, T3S, T4b, T40, T4a;
Chris@82 750 {
Chris@82 751 E T3v, T3C, T41, T42;
Chris@82 752 T3v = FNMS(KP707106781, T3u, T3t);
Chris@82 753 T3C = T3y - T3B;
Chris@82 754 T3D = FMA(KP923879532, T3C, T3v);
Chris@82 755 T47 = FNMS(KP923879532, T3C, T3v);
Chris@82 756 T41 = FNMS(KP668178637, T3G, T3J);
Chris@82 757 T42 = FMA(KP668178637, T3N, T3Q);
Chris@82 758 T43 = T41 + T42;
Chris@82 759 T48 = T42 - T41;
Chris@82 760 }
Chris@82 761 {
Chris@82 762 E T3K, T3R, T3W, T3Z;
Chris@82 763 T3K = FMA(KP668178637, T3J, T3G);
Chris@82 764 T3R = FNMS(KP668178637, T3Q, T3N);
Chris@82 765 T3S = T3K + T3R;
Chris@82 766 T4b = T3K - T3R;
Chris@82 767 T3W = FNMS(KP707106781, T3V, T3U);
Chris@82 768 T3Z = T3X - T3Y;
Chris@82 769 T40 = FMA(KP923879532, T3Z, T3W);
Chris@82 770 T4a = FNMS(KP923879532, T3Z, T3W);
Chris@82 771 }
Chris@82 772 {
Chris@82 773 E T3T, T44, T4d, T4e;
Chris@82 774 T3T = FMA(KP831469612, T3S, T3D);
Chris@82 775 T44 = FMA(KP831469612, T43, T40);
Chris@82 776 R1[WS(rs, 1)] = KP1_978353019 * (FMA(KP148335987, T44, T3T));
Chris@82 777 R1[WS(rs, 17)] = KP1_978353019 * (FNMS(KP148335987, T3T, T44));
Chris@82 778 T4d = FMA(KP831469612, T4b, T4a);
Chris@82 779 T4e = FMA(KP831469612, T48, T47);
Chris@82 780 R1[WS(rs, 13)] = KP1_940062506 * (FMA(KP250486960, T4e, T4d));
Chris@82 781 R1[WS(rs, 29)] = -(KP1_940062506 * (FNMS(KP250486960, T4d, T4e)));
Chris@82 782 }
Chris@82 783 {
Chris@82 784 E T45, T46, T49, T4c;
Chris@82 785 T45 = FNMS(KP831469612, T43, T40);
Chris@82 786 T46 = FNMS(KP831469612, T3S, T3D);
Chris@82 787 R1[WS(rs, 9)] = KP1_606415062 * (FMA(KP741650546, T46, T45));
Chris@82 788 R1[WS(rs, 25)] = -(KP1_606415062 * (FNMS(KP741650546, T45, T46)));
Chris@82 789 T49 = FNMS(KP831469612, T48, T47);
Chris@82 790 T4c = FNMS(KP831469612, T4b, T4a);
Chris@82 791 R1[WS(rs, 5)] = KP1_715457220 * (FMA(KP599376933, T4c, T49));
Chris@82 792 R1[WS(rs, 21)] = KP1_715457220 * (FNMS(KP599376933, T49, T4c));
Chris@82 793 }
Chris@82 794 }
Chris@82 795 }
Chris@82 796 }
Chris@82 797 }
Chris@82 798
Chris@82 799 static const kr2c_desc desc = { 64, "r2cbIII_64", {238, 64, 196, 0}, &GENUS };
Chris@82 800
Chris@82 801 void X(codelet_r2cbIII_64) (planner *p) {
Chris@82 802 X(kr2c_register) (p, r2cbIII_64, &desc);
Chris@82 803 }
Chris@82 804
Chris@82 805 #else
Chris@82 806
Chris@82 807 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 808
Chris@82 809 /*
Chris@82 810 * This function contains 434 FP additions, 208 FP multiplications,
Chris@82 811 * (or, 342 additions, 116 multiplications, 92 fused multiply/add),
Chris@82 812 * 130 stack variables, 39 constants, and 128 memory accesses
Chris@82 813 */
Chris@82 814 #include "rdft/scalar/r2cbIII.h"
Chris@82 815
Chris@82 816 static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 817 {
Chris@82 818 DK(KP1_343117909, +1.343117909694036801250753700854843606457501264);
Chris@82 819 DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
Chris@82 820 DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
Chris@82 821 DK(KP855110186, +0.855110186860564188641933713777597068609157259);
Chris@82 822 DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
Chris@82 823 DK(KP098135348, +0.098135348654836028509909953885365316629490726);
Chris@82 824 DK(KP673779706, +0.673779706784440101378506425238295140955533559);
Chris@82 825 DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
Chris@82 826 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 827 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 828 DK(KP1_191398608, +1.191398608984866686934073057659939779023852677);
Chris@82 829 DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
Chris@82 830 DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
Chris@82 831 DK(KP1_028205488, +1.028205488386443453187387677937631545216098241);
Chris@82 832 DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
Chris@82 833 DK(KP293460948, +0.293460948910723503317700259293435639412430633);
Chris@82 834 DK(KP485960359, +0.485960359806527779896548324154942236641981567);
Chris@82 835 DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
Chris@82 836 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 837 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 838 DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
Chris@82 839 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@82 840 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@82 841 DK(KP942793473, +0.942793473651995297112775251810508755314920638);
Chris@82 842 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@82 843 DK(KP196034280, +0.196034280659121203988391127777283691722273346);
Chris@82 844 DK(KP580569354, +0.580569354508924735272384751634790549382952557);
Chris@82 845 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@82 846 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@82 847 DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
Chris@82 848 DK(KP390180644, +0.390180644032256535696569736954044481855383236);
Chris@82 849 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@82 850 DK(KP765366864, +0.765366864730179543456919968060797733522689125);
Chris@82 851 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@82 852 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 853 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 854 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 855 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 856 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 857 {
Chris@82 858 INT i;
Chris@82 859 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 860 E T15, T3t, T3U, T2N, Tf, T6b, T6u, T6R, T4L, T5J, T1g, T3V, T5q, T5U, T2I;
Chris@82 861 E T3u, Tu, T6v, T4V, T5s, T6e, T6Q, T1s, T2D, T1D, T2E, T3B, T3Y, T4Q, T5r;
Chris@82 862 E T3y, T3X, TK, T6g, T57, T5N, T6j, T6N, T1W, T34, T25, T35, T3J, T4j, T52;
Chris@82 863 E T5M, T3G, T4i, TZ, T6l, T5i, T5Q, T6o, T6M, T2n, T37, T2w, T38, T3Q, T4m;
Chris@82 864 E T5d, T5P, T3N, T4l;
Chris@82 865 {
Chris@82 866 E T3, T11, T2M, T5n, T6, T2J, T14, T5m, Ta, T16, T19, T4J, Td, T1b, T1e;
Chris@82 867 E T4I;
Chris@82 868 {
Chris@82 869 E T1, T2, T2K, T2L;
Chris@82 870 T1 = Cr[0];
Chris@82 871 T2 = Cr[WS(csr, 31)];
Chris@82 872 T3 = T1 + T2;
Chris@82 873 T11 = T1 - T2;
Chris@82 874 T2K = Ci[0];
Chris@82 875 T2L = Ci[WS(csi, 31)];
Chris@82 876 T2M = T2K + T2L;
Chris@82 877 T5n = T2L - T2K;
Chris@82 878 }
Chris@82 879 {
Chris@82 880 E T4, T5, T12, T13;
Chris@82 881 T4 = Cr[WS(csr, 16)];
Chris@82 882 T5 = Cr[WS(csr, 15)];
Chris@82 883 T6 = T4 + T5;
Chris@82 884 T2J = T4 - T5;
Chris@82 885 T12 = Ci[WS(csi, 16)];
Chris@82 886 T13 = Ci[WS(csi, 15)];
Chris@82 887 T14 = T12 + T13;
Chris@82 888 T5m = T12 - T13;
Chris@82 889 }
Chris@82 890 {
Chris@82 891 E T8, T9, T17, T18;
Chris@82 892 T8 = Cr[WS(csr, 8)];
Chris@82 893 T9 = Cr[WS(csr, 23)];
Chris@82 894 Ta = T8 + T9;
Chris@82 895 T16 = T8 - T9;
Chris@82 896 T17 = Ci[WS(csi, 8)];
Chris@82 897 T18 = Ci[WS(csi, 23)];
Chris@82 898 T19 = T17 + T18;
Chris@82 899 T4J = T17 - T18;
Chris@82 900 }
Chris@82 901 {
Chris@82 902 E Tb, Tc, T1c, T1d;
Chris@82 903 Tb = Cr[WS(csr, 7)];
Chris@82 904 Tc = Cr[WS(csr, 24)];
Chris@82 905 Td = Tb + Tc;
Chris@82 906 T1b = Tb - Tc;
Chris@82 907 T1c = Ci[WS(csi, 7)];
Chris@82 908 T1d = Ci[WS(csi, 24)];
Chris@82 909 T1e = T1c + T1d;
Chris@82 910 T4I = T1d - T1c;
Chris@82 911 }
Chris@82 912 {
Chris@82 913 E T7, Te, T1a, T1f;
Chris@82 914 T15 = T11 - T14;
Chris@82 915 T3t = T11 + T14;
Chris@82 916 T3U = T2J - T2M;
Chris@82 917 T2N = T2J + T2M;
Chris@82 918 T7 = T3 + T6;
Chris@82 919 Te = Ta + Td;
Chris@82 920 Tf = T7 + Te;
Chris@82 921 T6b = T7 - Te;
Chris@82 922 {
Chris@82 923 E T6s, T6t, T4H, T4K;
Chris@82 924 T6s = T4J + T4I;
Chris@82 925 T6t = T5n - T5m;
Chris@82 926 T6u = T6s + T6t;
Chris@82 927 T6R = T6t - T6s;
Chris@82 928 T4H = T3 - T6;
Chris@82 929 T4K = T4I - T4J;
Chris@82 930 T4L = T4H + T4K;
Chris@82 931 T5J = T4H - T4K;
Chris@82 932 }
Chris@82 933 T1a = T16 - T19;
Chris@82 934 T1f = T1b - T1e;
Chris@82 935 T1g = KP707106781 * (T1a + T1f);
Chris@82 936 T3V = KP707106781 * (T1a - T1f);
Chris@82 937 {
Chris@82 938 E T5o, T5p, T2G, T2H;
Chris@82 939 T5o = T5m + T5n;
Chris@82 940 T5p = Ta - Td;
Chris@82 941 T5q = T5o - T5p;
Chris@82 942 T5U = T5p + T5o;
Chris@82 943 T2G = T16 + T19;
Chris@82 944 T2H = T1b + T1e;
Chris@82 945 T2I = KP707106781 * (T2G - T2H);
Chris@82 946 T3u = KP707106781 * (T2G + T2H);
Chris@82 947 }
Chris@82 948 }
Chris@82 949 }
Chris@82 950 {
Chris@82 951 E Ti, T1i, T1q, T4N, Tl, T1n, T1l, T4O, Tp, T1t, T1B, T4S, Ts, T1y, T1w;
Chris@82 952 E T4T;
Chris@82 953 {
Chris@82 954 E Tg, Th, T1o, T1p;
Chris@82 955 Tg = Cr[WS(csr, 4)];
Chris@82 956 Th = Cr[WS(csr, 27)];
Chris@82 957 Ti = Tg + Th;
Chris@82 958 T1i = Tg - Th;
Chris@82 959 T1o = Ci[WS(csi, 4)];
Chris@82 960 T1p = Ci[WS(csi, 27)];
Chris@82 961 T1q = T1o + T1p;
Chris@82 962 T4N = T1o - T1p;
Chris@82 963 }
Chris@82 964 {
Chris@82 965 E Tj, Tk, T1j, T1k;
Chris@82 966 Tj = Cr[WS(csr, 20)];
Chris@82 967 Tk = Cr[WS(csr, 11)];
Chris@82 968 Tl = Tj + Tk;
Chris@82 969 T1n = Tj - Tk;
Chris@82 970 T1j = Ci[WS(csi, 20)];
Chris@82 971 T1k = Ci[WS(csi, 11)];
Chris@82 972 T1l = T1j + T1k;
Chris@82 973 T4O = T1j - T1k;
Chris@82 974 }
Chris@82 975 {
Chris@82 976 E Tn, To, T1z, T1A;
Chris@82 977 Tn = Cr[WS(csr, 3)];
Chris@82 978 To = Cr[WS(csr, 28)];
Chris@82 979 Tp = Tn + To;
Chris@82 980 T1t = Tn - To;
Chris@82 981 T1z = Ci[WS(csi, 3)];
Chris@82 982 T1A = Ci[WS(csi, 28)];
Chris@82 983 T1B = T1z + T1A;
Chris@82 984 T4S = T1A - T1z;
Chris@82 985 }
Chris@82 986 {
Chris@82 987 E Tq, Tr, T1u, T1v;
Chris@82 988 Tq = Cr[WS(csr, 12)];
Chris@82 989 Tr = Cr[WS(csr, 19)];
Chris@82 990 Ts = Tq + Tr;
Chris@82 991 T1y = Tq - Tr;
Chris@82 992 T1u = Ci[WS(csi, 12)];
Chris@82 993 T1v = Ci[WS(csi, 19)];
Chris@82 994 T1w = T1u + T1v;
Chris@82 995 T4T = T1u - T1v;
Chris@82 996 }
Chris@82 997 {
Chris@82 998 E Tm, Tt, T4R, T4U;
Chris@82 999 Tm = Ti + Tl;
Chris@82 1000 Tt = Tp + Ts;
Chris@82 1001 Tu = Tm + Tt;
Chris@82 1002 T6v = Tm - Tt;
Chris@82 1003 T4R = Tp - Ts;
Chris@82 1004 T4U = T4S - T4T;
Chris@82 1005 T4V = T4R + T4U;
Chris@82 1006 T5s = T4U - T4R;
Chris@82 1007 }
Chris@82 1008 {
Chris@82 1009 E T6c, T6d, T1m, T1r;
Chris@82 1010 T6c = T4T + T4S;
Chris@82 1011 T6d = T4O + T4N;
Chris@82 1012 T6e = T6c - T6d;
Chris@82 1013 T6Q = T6d + T6c;
Chris@82 1014 T1m = T1i - T1l;
Chris@82 1015 T1r = T1n + T1q;
Chris@82 1016 T1s = FNMS(KP382683432, T1r, KP923879532 * T1m);
Chris@82 1017 T2D = FMA(KP382683432, T1m, KP923879532 * T1r);
Chris@82 1018 }
Chris@82 1019 {
Chris@82 1020 E T1x, T1C, T3z, T3A;
Chris@82 1021 T1x = T1t - T1w;
Chris@82 1022 T1C = T1y - T1B;
Chris@82 1023 T1D = FMA(KP923879532, T1x, KP382683432 * T1C);
Chris@82 1024 T2E = FNMS(KP382683432, T1x, KP923879532 * T1C);
Chris@82 1025 T3z = T1t + T1w;
Chris@82 1026 T3A = T1y + T1B;
Chris@82 1027 T3B = FNMS(KP923879532, T3A, KP382683432 * T3z);
Chris@82 1028 T3Y = FMA(KP923879532, T3z, KP382683432 * T3A);
Chris@82 1029 }
Chris@82 1030 {
Chris@82 1031 E T4M, T4P, T3w, T3x;
Chris@82 1032 T4M = Ti - Tl;
Chris@82 1033 T4P = T4N - T4O;
Chris@82 1034 T4Q = T4M - T4P;
Chris@82 1035 T5r = T4M + T4P;
Chris@82 1036 T3w = T1i + T1l;
Chris@82 1037 T3x = T1q - T1n;
Chris@82 1038 T3y = FNMS(KP923879532, T3x, KP382683432 * T3w);
Chris@82 1039 T3X = FMA(KP923879532, T3w, KP382683432 * T3x);
Chris@82 1040 }
Chris@82 1041 }
Chris@82 1042 {
Chris@82 1043 E Ty, T1G, T23, T54, TB, T20, T1J, T55, TI, T4Z, T1U, T1Y, TF, T50, T1P;
Chris@82 1044 E T1X;
Chris@82 1045 {
Chris@82 1046 E Tw, Tx, T1H, T1I;
Chris@82 1047 Tw = Cr[WS(csr, 2)];
Chris@82 1048 Tx = Cr[WS(csr, 29)];
Chris@82 1049 Ty = Tw + Tx;
Chris@82 1050 T1G = Tw - Tx;
Chris@82 1051 {
Chris@82 1052 E T21, T22, Tz, TA;
Chris@82 1053 T21 = Ci[WS(csi, 2)];
Chris@82 1054 T22 = Ci[WS(csi, 29)];
Chris@82 1055 T23 = T21 + T22;
Chris@82 1056 T54 = T21 - T22;
Chris@82 1057 Tz = Cr[WS(csr, 18)];
Chris@82 1058 TA = Cr[WS(csr, 13)];
Chris@82 1059 TB = Tz + TA;
Chris@82 1060 T20 = Tz - TA;
Chris@82 1061 }
Chris@82 1062 T1H = Ci[WS(csi, 18)];
Chris@82 1063 T1I = Ci[WS(csi, 13)];
Chris@82 1064 T1J = T1H + T1I;
Chris@82 1065 T55 = T1H - T1I;
Chris@82 1066 {
Chris@82 1067 E TG, TH, T1Q, T1R, T1S, T1T;
Chris@82 1068 TG = Cr[WS(csr, 5)];
Chris@82 1069 TH = Cr[WS(csr, 26)];
Chris@82 1070 T1Q = TG - TH;
Chris@82 1071 T1R = Ci[WS(csi, 5)];
Chris@82 1072 T1S = Ci[WS(csi, 26)];
Chris@82 1073 T1T = T1R + T1S;
Chris@82 1074 TI = TG + TH;
Chris@82 1075 T4Z = T1S - T1R;
Chris@82 1076 T1U = T1Q - T1T;
Chris@82 1077 T1Y = T1Q + T1T;
Chris@82 1078 }
Chris@82 1079 {
Chris@82 1080 E TD, TE, T1L, T1M, T1N, T1O;
Chris@82 1081 TD = Cr[WS(csr, 10)];
Chris@82 1082 TE = Cr[WS(csr, 21)];
Chris@82 1083 T1L = TD - TE;
Chris@82 1084 T1M = Ci[WS(csi, 10)];
Chris@82 1085 T1N = Ci[WS(csi, 21)];
Chris@82 1086 T1O = T1M + T1N;
Chris@82 1087 TF = TD + TE;
Chris@82 1088 T50 = T1M - T1N;
Chris@82 1089 T1P = T1L - T1O;
Chris@82 1090 T1X = T1L + T1O;
Chris@82 1091 }
Chris@82 1092 }
Chris@82 1093 {
Chris@82 1094 E TC, TJ, T53, T56;
Chris@82 1095 TC = Ty + TB;
Chris@82 1096 TJ = TF + TI;
Chris@82 1097 TK = TC + TJ;
Chris@82 1098 T6g = TC - TJ;
Chris@82 1099 T53 = TF - TI;
Chris@82 1100 T56 = T54 - T55;
Chris@82 1101 T57 = T53 + T56;
Chris@82 1102 T5N = T56 - T53;
Chris@82 1103 }
Chris@82 1104 {
Chris@82 1105 E T6h, T6i, T1K, T1V;
Chris@82 1106 T6h = T55 + T54;
Chris@82 1107 T6i = T50 + T4Z;
Chris@82 1108 T6j = T6h - T6i;
Chris@82 1109 T6N = T6i + T6h;
Chris@82 1110 T1K = T1G - T1J;
Chris@82 1111 T1V = KP707106781 * (T1P + T1U);
Chris@82 1112 T1W = T1K + T1V;
Chris@82 1113 T34 = T1K - T1V;
Chris@82 1114 }
Chris@82 1115 {
Chris@82 1116 E T1Z, T24, T3H, T3I;
Chris@82 1117 T1Z = KP707106781 * (T1X - T1Y);
Chris@82 1118 T24 = T20 + T23;
Chris@82 1119 T25 = T1Z + T24;
Chris@82 1120 T35 = T24 - T1Z;
Chris@82 1121 T3H = KP707106781 * (T1P - T1U);
Chris@82 1122 T3I = T23 - T20;
Chris@82 1123 T3J = T3H + T3I;
Chris@82 1124 T4j = T3I - T3H;
Chris@82 1125 }
Chris@82 1126 {
Chris@82 1127 E T4Y, T51, T3E, T3F;
Chris@82 1128 T4Y = Ty - TB;
Chris@82 1129 T51 = T4Z - T50;
Chris@82 1130 T52 = T4Y + T51;
Chris@82 1131 T5M = T4Y - T51;
Chris@82 1132 T3E = T1G + T1J;
Chris@82 1133 T3F = KP707106781 * (T1X + T1Y);
Chris@82 1134 T3G = T3E - T3F;
Chris@82 1135 T4i = T3E + T3F;
Chris@82 1136 }
Chris@82 1137 }
Chris@82 1138 {
Chris@82 1139 E TN, T27, T2u, T5f, TQ, T2r, T2a, T5g, TX, T5a, T2l, T2p, TU, T5b, T2g;
Chris@82 1140 E T2o;
Chris@82 1141 {
Chris@82 1142 E TL, TM, T28, T29;
Chris@82 1143 TL = Cr[WS(csr, 1)];
Chris@82 1144 TM = Cr[WS(csr, 30)];
Chris@82 1145 TN = TL + TM;
Chris@82 1146 T27 = TL - TM;
Chris@82 1147 {
Chris@82 1148 E T2s, T2t, TO, TP;
Chris@82 1149 T2s = Ci[WS(csi, 1)];
Chris@82 1150 T2t = Ci[WS(csi, 30)];
Chris@82 1151 T2u = T2s + T2t;
Chris@82 1152 T5f = T2t - T2s;
Chris@82 1153 TO = Cr[WS(csr, 14)];
Chris@82 1154 TP = Cr[WS(csr, 17)];
Chris@82 1155 TQ = TO + TP;
Chris@82 1156 T2r = TO - TP;
Chris@82 1157 }
Chris@82 1158 T28 = Ci[WS(csi, 14)];
Chris@82 1159 T29 = Ci[WS(csi, 17)];
Chris@82 1160 T2a = T28 + T29;
Chris@82 1161 T5g = T28 - T29;
Chris@82 1162 {
Chris@82 1163 E TV, TW, T2h, T2i, T2j, T2k;
Chris@82 1164 TV = Cr[WS(csr, 9)];
Chris@82 1165 TW = Cr[WS(csr, 22)];
Chris@82 1166 T2h = TV - TW;
Chris@82 1167 T2i = Ci[WS(csi, 9)];
Chris@82 1168 T2j = Ci[WS(csi, 22)];
Chris@82 1169 T2k = T2i + T2j;
Chris@82 1170 TX = TV + TW;
Chris@82 1171 T5a = T2j - T2i;
Chris@82 1172 T2l = T2h - T2k;
Chris@82 1173 T2p = T2h + T2k;
Chris@82 1174 }
Chris@82 1175 {
Chris@82 1176 E TS, TT, T2c, T2d, T2e, T2f;
Chris@82 1177 TS = Cr[WS(csr, 6)];
Chris@82 1178 TT = Cr[WS(csr, 25)];
Chris@82 1179 T2c = TS - TT;
Chris@82 1180 T2d = Ci[WS(csi, 6)];
Chris@82 1181 T2e = Ci[WS(csi, 25)];
Chris@82 1182 T2f = T2d + T2e;
Chris@82 1183 TU = TS + TT;
Chris@82 1184 T5b = T2d - T2e;
Chris@82 1185 T2g = T2c - T2f;
Chris@82 1186 T2o = T2c + T2f;
Chris@82 1187 }
Chris@82 1188 }
Chris@82 1189 {
Chris@82 1190 E TR, TY, T5e, T5h;
Chris@82 1191 TR = TN + TQ;
Chris@82 1192 TY = TU + TX;
Chris@82 1193 TZ = TR + TY;
Chris@82 1194 T6l = TR - TY;
Chris@82 1195 T5e = TU - TX;
Chris@82 1196 T5h = T5f - T5g;
Chris@82 1197 T5i = T5e + T5h;
Chris@82 1198 T5Q = T5h - T5e;
Chris@82 1199 }
Chris@82 1200 {
Chris@82 1201 E T6m, T6n, T2b, T2m;
Chris@82 1202 T6m = T5g + T5f;
Chris@82 1203 T6n = T5b + T5a;
Chris@82 1204 T6o = T6m - T6n;
Chris@82 1205 T6M = T6n + T6m;
Chris@82 1206 T2b = T27 - T2a;
Chris@82 1207 T2m = KP707106781 * (T2g + T2l);
Chris@82 1208 T2n = T2b + T2m;
Chris@82 1209 T37 = T2b - T2m;
Chris@82 1210 }
Chris@82 1211 {
Chris@82 1212 E T2q, T2v, T3O, T3P;
Chris@82 1213 T2q = KP707106781 * (T2o - T2p);
Chris@82 1214 T2v = T2r - T2u;
Chris@82 1215 T2w = T2q + T2v;
Chris@82 1216 T38 = T2v - T2q;
Chris@82 1217 T3O = KP707106781 * (T2g - T2l);
Chris@82 1218 T3P = T2r + T2u;
Chris@82 1219 T3Q = T3O - T3P;
Chris@82 1220 T4m = T3O + T3P;
Chris@82 1221 }
Chris@82 1222 {
Chris@82 1223 E T59, T5c, T3L, T3M;
Chris@82 1224 T59 = TN - TQ;
Chris@82 1225 T5c = T5a - T5b;
Chris@82 1226 T5d = T59 + T5c;
Chris@82 1227 T5P = T59 - T5c;
Chris@82 1228 T3L = T27 + T2a;
Chris@82 1229 T3M = KP707106781 * (T2o + T2p);
Chris@82 1230 T3N = T3L - T3M;
Chris@82 1231 T4l = T3L + T3M;
Chris@82 1232 }
Chris@82 1233 }
Chris@82 1234 {
Chris@82 1235 E Tv, T10, T6X, T6Y, T6Z, T70;
Chris@82 1236 Tv = Tf + Tu;
Chris@82 1237 T10 = TK + TZ;
Chris@82 1238 T6X = Tv - T10;
Chris@82 1239 T6Y = T6N + T6M;
Chris@82 1240 T6Z = T6R - T6Q;
Chris@82 1241 T70 = T6Y + T6Z;
Chris@82 1242 R0[0] = KP2_000000000 * (Tv + T10);
Chris@82 1243 R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y);
Chris@82 1244 R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70);
Chris@82 1245 R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X);
Chris@82 1246 }
Chris@82 1247 {
Chris@82 1248 E T6P, T6V, T6U, T6W;
Chris@82 1249 {
Chris@82 1250 E T6L, T6O, T6S, T6T;
Chris@82 1251 T6L = Tf - Tu;
Chris@82 1252 T6O = T6M - T6N;
Chris@82 1253 T6P = T6L + T6O;
Chris@82 1254 T6V = T6L - T6O;
Chris@82 1255 T6S = T6Q + T6R;
Chris@82 1256 T6T = TK - TZ;
Chris@82 1257 T6U = T6S - T6T;
Chris@82 1258 T6W = T6T + T6S;
Chris@82 1259 }
Chris@82 1260 R0[WS(rs, 4)] = FMA(KP1_847759065, T6P, KP765366864 * T6U);
Chris@82 1261 R0[WS(rs, 28)] = FNMS(KP1_847759065, T6V, KP765366864 * T6W);
Chris@82 1262 R0[WS(rs, 20)] = FNMS(KP765366864, T6P, KP1_847759065 * T6U);
Chris@82 1263 R0[WS(rs, 12)] = FMA(KP765366864, T6V, KP1_847759065 * T6W);
Chris@82 1264 }
Chris@82 1265 {
Chris@82 1266 E T6f, T6w, T6G, T6D, T6z, T6E, T6q, T6H;
Chris@82 1267 T6f = T6b + T6e;
Chris@82 1268 T6w = T6u - T6v;
Chris@82 1269 T6G = T6v + T6u;
Chris@82 1270 T6D = T6b - T6e;
Chris@82 1271 {
Chris@82 1272 E T6x, T6y, T6k, T6p;
Chris@82 1273 T6x = T6g + T6j;
Chris@82 1274 T6y = T6o - T6l;
Chris@82 1275 T6z = KP707106781 * (T6x + T6y);
Chris@82 1276 T6E = KP707106781 * (T6y - T6x);
Chris@82 1277 T6k = T6g - T6j;
Chris@82 1278 T6p = T6l + T6o;
Chris@82 1279 T6q = KP707106781 * (T6k + T6p);
Chris@82 1280 T6H = KP707106781 * (T6k - T6p);
Chris@82 1281 }
Chris@82 1282 {
Chris@82 1283 E T6r, T6A, T6J, T6K;
Chris@82 1284 T6r = T6f + T6q;
Chris@82 1285 T6A = T6w - T6z;
Chris@82 1286 R0[WS(rs, 2)] = FMA(KP1_961570560, T6r, KP390180644 * T6A);
Chris@82 1287 R0[WS(rs, 18)] = FNMS(KP390180644, T6r, KP1_961570560 * T6A);
Chris@82 1288 T6J = T6D - T6E;
Chris@82 1289 T6K = T6H + T6G;
Chris@82 1290 R0[WS(rs, 14)] = FMA(KP390180644, T6J, KP1_961570560 * T6K);
Chris@82 1291 R0[WS(rs, 30)] = FNMS(KP1_961570560, T6J, KP390180644 * T6K);
Chris@82 1292 }
Chris@82 1293 {
Chris@82 1294 E T6B, T6C, T6F, T6I;
Chris@82 1295 T6B = T6f - T6q;
Chris@82 1296 T6C = T6z + T6w;
Chris@82 1297 R0[WS(rs, 10)] = FMA(KP1_111140466, T6B, KP1_662939224 * T6C);
Chris@82 1298 R0[WS(rs, 26)] = FNMS(KP1_662939224, T6B, KP1_111140466 * T6C);
Chris@82 1299 T6F = T6D + T6E;
Chris@82 1300 T6I = T6G - T6H;
Chris@82 1301 R0[WS(rs, 6)] = FMA(KP1_662939224, T6F, KP1_111140466 * T6I);
Chris@82 1302 R0[WS(rs, 22)] = FNMS(KP1_111140466, T6F, KP1_662939224 * T6I);
Chris@82 1303 }
Chris@82 1304 }
Chris@82 1305 {
Chris@82 1306 E T5L, T63, T5W, T66, T5S, T67, T5Z, T64, T5K, T5V;
Chris@82 1307 T5K = KP707106781 * (T5s - T5r);
Chris@82 1308 T5L = T5J + T5K;
Chris@82 1309 T63 = T5J - T5K;
Chris@82 1310 T5V = KP707106781 * (T4Q - T4V);
Chris@82 1311 T5W = T5U - T5V;
Chris@82 1312 T66 = T5V + T5U;
Chris@82 1313 {
Chris@82 1314 E T5O, T5R, T5X, T5Y;
Chris@82 1315 T5O = FNMS(KP923879532, T5N, KP382683432 * T5M);
Chris@82 1316 T5R = FMA(KP382683432, T5P, KP923879532 * T5Q);
Chris@82 1317 T5S = T5O + T5R;
Chris@82 1318 T67 = T5O - T5R;
Chris@82 1319 T5X = FMA(KP923879532, T5M, KP382683432 * T5N);
Chris@82 1320 T5Y = FNMS(KP923879532, T5P, KP382683432 * T5Q);
Chris@82 1321 T5Z = T5X + T5Y;
Chris@82 1322 T64 = T5Y - T5X;
Chris@82 1323 }
Chris@82 1324 {
Chris@82 1325 E T5T, T60, T69, T6a;
Chris@82 1326 T5T = T5L + T5S;
Chris@82 1327 T60 = T5W - T5Z;
Chris@82 1328 R0[WS(rs, 3)] = FMA(KP1_913880671, T5T, KP580569354 * T60);
Chris@82 1329 R0[WS(rs, 19)] = FNMS(KP580569354, T5T, KP1_913880671 * T60);
Chris@82 1330 T69 = T63 - T64;
Chris@82 1331 T6a = T67 + T66;
Chris@82 1332 R0[WS(rs, 15)] = FMA(KP196034280, T69, KP1_990369453 * T6a);
Chris@82 1333 R0[WS(rs, 31)] = FNMS(KP1_990369453, T69, KP196034280 * T6a);
Chris@82 1334 }
Chris@82 1335 {
Chris@82 1336 E T61, T62, T65, T68;
Chris@82 1337 T61 = T5L - T5S;
Chris@82 1338 T62 = T5Z + T5W;
Chris@82 1339 R0[WS(rs, 11)] = FMA(KP942793473, T61, KP1_763842528 * T62);
Chris@82 1340 R0[WS(rs, 27)] = FNMS(KP1_763842528, T61, KP942793473 * T62);
Chris@82 1341 T65 = T63 + T64;
Chris@82 1342 T68 = T66 - T67;
Chris@82 1343 R0[WS(rs, 7)] = FMA(KP1_546020906, T65, KP1_268786568 * T68);
Chris@82 1344 R0[WS(rs, 23)] = FNMS(KP1_268786568, T65, KP1_546020906 * T68);
Chris@82 1345 }
Chris@82 1346 }
Chris@82 1347 {
Chris@82 1348 E T4X, T5B, T5u, T5E, T5k, T5F, T5x, T5C, T4W, T5t;
Chris@82 1349 T4W = KP707106781 * (T4Q + T4V);
Chris@82 1350 T4X = T4L + T4W;
Chris@82 1351 T5B = T4L - T4W;
Chris@82 1352 T5t = KP707106781 * (T5r + T5s);
Chris@82 1353 T5u = T5q - T5t;
Chris@82 1354 T5E = T5t + T5q;
Chris@82 1355 {
Chris@82 1356 E T58, T5j, T5v, T5w;
Chris@82 1357 T58 = FNMS(KP382683432, T57, KP923879532 * T52);
Chris@82 1358 T5j = FMA(KP923879532, T5d, KP382683432 * T5i);
Chris@82 1359 T5k = T58 + T5j;
Chris@82 1360 T5F = T58 - T5j;
Chris@82 1361 T5v = FMA(KP382683432, T52, KP923879532 * T57);
Chris@82 1362 T5w = FNMS(KP382683432, T5d, KP923879532 * T5i);
Chris@82 1363 T5x = T5v + T5w;
Chris@82 1364 T5C = T5w - T5v;
Chris@82 1365 }
Chris@82 1366 {
Chris@82 1367 E T5l, T5y, T5H, T5I;
Chris@82 1368 T5l = T4X + T5k;
Chris@82 1369 T5y = T5u - T5x;
Chris@82 1370 R0[WS(rs, 1)] = FMA(KP1_990369453, T5l, KP196034280 * T5y);
Chris@82 1371 R0[WS(rs, 17)] = FNMS(KP196034280, T5l, KP1_990369453 * T5y);
Chris@82 1372 T5H = T5B - T5C;
Chris@82 1373 T5I = T5F + T5E;
Chris@82 1374 R0[WS(rs, 13)] = FMA(KP580569354, T5H, KP1_913880671 * T5I);
Chris@82 1375 R0[WS(rs, 29)] = FNMS(KP1_913880671, T5H, KP580569354 * T5I);
Chris@82 1376 }
Chris@82 1377 {
Chris@82 1378 E T5z, T5A, T5D, T5G;
Chris@82 1379 T5z = T4X - T5k;
Chris@82 1380 T5A = T5x + T5u;
Chris@82 1381 R0[WS(rs, 9)] = FMA(KP1_268786568, T5z, KP1_546020906 * T5A);
Chris@82 1382 R0[WS(rs, 25)] = FNMS(KP1_546020906, T5z, KP1_268786568 * T5A);
Chris@82 1383 T5D = T5B + T5C;
Chris@82 1384 T5G = T5E - T5F;
Chris@82 1385 R0[WS(rs, 5)] = FMA(KP1_763842528, T5D, KP942793473 * T5G);
Chris@82 1386 R0[WS(rs, 21)] = FNMS(KP942793473, T5D, KP1_763842528 * T5G);
Chris@82 1387 }
Chris@82 1388 }
Chris@82 1389 {
Chris@82 1390 E T33, T3l, T3h, T3m, T3a, T3p, T3e, T3o;
Chris@82 1391 {
Chris@82 1392 E T31, T32, T3f, T3g;
Chris@82 1393 T31 = T15 - T1g;
Chris@82 1394 T32 = T2E - T2D;
Chris@82 1395 T33 = T31 + T32;
Chris@82 1396 T3l = T31 - T32;
Chris@82 1397 T3f = FMA(KP831469612, T34, KP555570233 * T35);
Chris@82 1398 T3g = FNMS(KP831469612, T37, KP555570233 * T38);
Chris@82 1399 T3h = T3f + T3g;
Chris@82 1400 T3m = T3g - T3f;
Chris@82 1401 }
Chris@82 1402 {
Chris@82 1403 E T36, T39, T3c, T3d;
Chris@82 1404 T36 = FNMS(KP831469612, T35, KP555570233 * T34);
Chris@82 1405 T39 = FMA(KP555570233, T37, KP831469612 * T38);
Chris@82 1406 T3a = T36 + T39;
Chris@82 1407 T3p = T36 - T39;
Chris@82 1408 T3c = T2I - T2N;
Chris@82 1409 T3d = T1s - T1D;
Chris@82 1410 T3e = T3c - T3d;
Chris@82 1411 T3o = T3d + T3c;
Chris@82 1412 }
Chris@82 1413 {
Chris@82 1414 E T3b, T3i, T3r, T3s;
Chris@82 1415 T3b = T33 + T3a;
Chris@82 1416 T3i = T3e - T3h;
Chris@82 1417 R1[WS(rs, 2)] = FMA(KP1_940062506, T3b, KP485960359 * T3i);
Chris@82 1418 R1[WS(rs, 18)] = FNMS(KP485960359, T3b, KP1_940062506 * T3i);
Chris@82 1419 T3r = T3l - T3m;
Chris@82 1420 T3s = T3p + T3o;
Chris@82 1421 R1[WS(rs, 14)] = FMA(KP293460948, T3r, KP1_978353019 * T3s);
Chris@82 1422 R1[WS(rs, 30)] = FNMS(KP1_978353019, T3r, KP293460948 * T3s);
Chris@82 1423 }
Chris@82 1424 {
Chris@82 1425 E T3j, T3k, T3n, T3q;
Chris@82 1426 T3j = T33 - T3a;
Chris@82 1427 T3k = T3h + T3e;
Chris@82 1428 R1[WS(rs, 10)] = FMA(KP1_028205488, T3j, KP1_715457220 * T3k);
Chris@82 1429 R1[WS(rs, 26)] = FNMS(KP1_715457220, T3j, KP1_028205488 * T3k);
Chris@82 1430 T3n = T3l + T3m;
Chris@82 1431 T3q = T3o - T3p;
Chris@82 1432 R1[WS(rs, 6)] = FMA(KP1_606415062, T3n, KP1_191398608 * T3q);
Chris@82 1433 R1[WS(rs, 22)] = FNMS(KP1_191398608, T3n, KP1_606415062 * T3q);
Chris@82 1434 }
Chris@82 1435 }
Chris@82 1436 {
Chris@82 1437 E T4h, T4z, T4v, T4A, T4o, T4D, T4s, T4C;
Chris@82 1438 {
Chris@82 1439 E T4f, T4g, T4t, T4u;
Chris@82 1440 T4f = T3t + T3u;
Chris@82 1441 T4g = T3X + T3Y;
Chris@82 1442 T4h = T4f - T4g;
Chris@82 1443 T4z = T4f + T4g;
Chris@82 1444 T4t = FMA(KP980785280, T4i, KP195090322 * T4j);
Chris@82 1445 T4u = FMA(KP980785280, T4l, KP195090322 * T4m);
Chris@82 1446 T4v = T4t - T4u;
Chris@82 1447 T4A = T4t + T4u;
Chris@82 1448 }
Chris@82 1449 {
Chris@82 1450 E T4k, T4n, T4q, T4r;
Chris@82 1451 T4k = FNMS(KP980785280, T4j, KP195090322 * T4i);
Chris@82 1452 T4n = FNMS(KP980785280, T4m, KP195090322 * T4l);
Chris@82 1453 T4o = T4k + T4n;
Chris@82 1454 T4D = T4k - T4n;
Chris@82 1455 T4q = T3V + T3U;
Chris@82 1456 T4r = T3y - T3B;
Chris@82 1457 T4s = T4q - T4r;
Chris@82 1458 T4C = T4r + T4q;
Chris@82 1459 }
Chris@82 1460 {
Chris@82 1461 E T4p, T4w, T4F, T4G;
Chris@82 1462 T4p = T4h + T4o;
Chris@82 1463 T4w = T4s - T4v;
Chris@82 1464 R1[WS(rs, 3)] = FMA(KP1_883088130, T4p, KP673779706 * T4w);
Chris@82 1465 R1[WS(rs, 19)] = FNMS(KP673779706, T4p, KP1_883088130 * T4w);
Chris@82 1466 T4F = T4z + T4A;
Chris@82 1467 T4G = T4D + T4C;
Chris@82 1468 R1[WS(rs, 15)] = FMA(KP098135348, T4F, KP1_997590912 * T4G);
Chris@82 1469 R1[WS(rs, 31)] = FNMS(KP1_997590912, T4F, KP098135348 * T4G);
Chris@82 1470 }
Chris@82 1471 {
Chris@82 1472 E T4x, T4y, T4B, T4E;
Chris@82 1473 T4x = T4h - T4o;
Chris@82 1474 T4y = T4v + T4s;
Chris@82 1475 R1[WS(rs, 11)] = FMA(KP855110186, T4x, KP1_807978586 * T4y);
Chris@82 1476 R1[WS(rs, 27)] = FNMS(KP1_807978586, T4x, KP855110186 * T4y);
Chris@82 1477 T4B = T4z - T4A;
Chris@82 1478 T4E = T4C - T4D;
Chris@82 1479 R1[WS(rs, 7)] = FMA(KP1_481902250, T4B, KP1_343117909 * T4E);
Chris@82 1480 R1[WS(rs, 23)] = FNMS(KP1_343117909, T4B, KP1_481902250 * T4E);
Chris@82 1481 }
Chris@82 1482 }
Chris@82 1483 {
Chris@82 1484 E T1F, T2T, T2P, T2W, T2y, T2X, T2C, T2U;
Chris@82 1485 {
Chris@82 1486 E T1h, T1E, T2F, T2O;
Chris@82 1487 T1h = T15 + T1g;
Chris@82 1488 T1E = T1s + T1D;
Chris@82 1489 T1F = T1h + T1E;
Chris@82 1490 T2T = T1h - T1E;
Chris@82 1491 T2F = T2D + T2E;
Chris@82 1492 T2O = T2I + T2N;
Chris@82 1493 T2P = T2F + T2O;
Chris@82 1494 T2W = T2F - T2O;
Chris@82 1495 }
Chris@82 1496 {
Chris@82 1497 E T26, T2x, T2A, T2B;
Chris@82 1498 T26 = FNMS(KP195090322, T25, KP980785280 * T1W);
Chris@82 1499 T2x = FMA(KP980785280, T2n, KP195090322 * T2w);
Chris@82 1500 T2y = T26 + T2x;
Chris@82 1501 T2X = T26 - T2x;
Chris@82 1502 T2A = FMA(KP195090322, T1W, KP980785280 * T25);
Chris@82 1503 T2B = FNMS(KP195090322, T2n, KP980785280 * T2w);
Chris@82 1504 T2C = T2A + T2B;
Chris@82 1505 T2U = T2B - T2A;
Chris@82 1506 }
Chris@82 1507 {
Chris@82 1508 E T2z, T2Q, T2Z, T30;
Chris@82 1509 T2z = T1F + T2y;
Chris@82 1510 T2Q = T2C + T2P;
Chris@82 1511 R1[0] = FNMS(KP098135348, T2Q, KP1_997590912 * T2z);
Chris@82 1512 R1[WS(rs, 16)] = -(FMA(KP098135348, T2z, KP1_997590912 * T2Q));
Chris@82 1513 T2Z = T2T - T2U;
Chris@82 1514 T30 = T2X + T2W;
Chris@82 1515 R1[WS(rs, 12)] = FMA(KP673779706, T2Z, KP1_883088130 * T30);
Chris@82 1516 R1[WS(rs, 28)] = FNMS(KP1_883088130, T2Z, KP673779706 * T30);
Chris@82 1517 }
Chris@82 1518 {
Chris@82 1519 E T2R, T2S, T2V, T2Y;
Chris@82 1520 T2R = T1F - T2y;
Chris@82 1521 T2S = T2C - T2P;
Chris@82 1522 R1[WS(rs, 8)] = FMA(KP1_343117909, T2R, KP1_481902250 * T2S);
Chris@82 1523 R1[WS(rs, 24)] = FNMS(KP1_481902250, T2R, KP1_343117909 * T2S);
Chris@82 1524 T2V = T2T + T2U;
Chris@82 1525 T2Y = T2W - T2X;
Chris@82 1526 R1[WS(rs, 4)] = FMA(KP1_807978586, T2V, KP855110186 * T2Y);
Chris@82 1527 R1[WS(rs, 20)] = FNMS(KP855110186, T2V, KP1_807978586 * T2Y);
Chris@82 1528 }
Chris@82 1529 }
Chris@82 1530 {
Chris@82 1531 E T3D, T47, T43, T48, T3S, T4b, T40, T4a;
Chris@82 1532 {
Chris@82 1533 E T3v, T3C, T41, T42;
Chris@82 1534 T3v = T3t - T3u;
Chris@82 1535 T3C = T3y + T3B;
Chris@82 1536 T3D = T3v + T3C;
Chris@82 1537 T47 = T3v - T3C;
Chris@82 1538 T41 = FMA(KP555570233, T3G, KP831469612 * T3J);
Chris@82 1539 T42 = FNMS(KP555570233, T3N, KP831469612 * T3Q);
Chris@82 1540 T43 = T41 + T42;
Chris@82 1541 T48 = T42 - T41;
Chris@82 1542 }
Chris@82 1543 {
Chris@82 1544 E T3K, T3R, T3W, T3Z;
Chris@82 1545 T3K = FNMS(KP555570233, T3J, KP831469612 * T3G);
Chris@82 1546 T3R = FMA(KP831469612, T3N, KP555570233 * T3Q);
Chris@82 1547 T3S = T3K + T3R;
Chris@82 1548 T4b = T3K - T3R;
Chris@82 1549 T3W = T3U - T3V;
Chris@82 1550 T3Z = T3X - T3Y;
Chris@82 1551 T40 = T3W - T3Z;
Chris@82 1552 T4a = T3Z + T3W;
Chris@82 1553 }
Chris@82 1554 {
Chris@82 1555 E T3T, T44, T4d, T4e;
Chris@82 1556 T3T = T3D + T3S;
Chris@82 1557 T44 = T40 - T43;
Chris@82 1558 R1[WS(rs, 1)] = FMA(KP1_978353019, T3T, KP293460948 * T44);
Chris@82 1559 R1[WS(rs, 17)] = FNMS(KP293460948, T3T, KP1_978353019 * T44);
Chris@82 1560 T4d = T47 - T48;
Chris@82 1561 T4e = T4b + T4a;
Chris@82 1562 R1[WS(rs, 13)] = FMA(KP485960359, T4d, KP1_940062506 * T4e);
Chris@82 1563 R1[WS(rs, 29)] = FNMS(KP1_940062506, T4d, KP485960359 * T4e);
Chris@82 1564 }
Chris@82 1565 {
Chris@82 1566 E T45, T46, T49, T4c;
Chris@82 1567 T45 = T3D - T3S;
Chris@82 1568 T46 = T43 + T40;
Chris@82 1569 R1[WS(rs, 9)] = FMA(KP1_191398608, T45, KP1_606415062 * T46);
Chris@82 1570 R1[WS(rs, 25)] = FNMS(KP1_606415062, T45, KP1_191398608 * T46);
Chris@82 1571 T49 = T47 + T48;
Chris@82 1572 T4c = T4a - T4b;
Chris@82 1573 R1[WS(rs, 5)] = FMA(KP1_715457220, T49, KP1_028205488 * T4c);
Chris@82 1574 R1[WS(rs, 21)] = FNMS(KP1_028205488, T49, KP1_715457220 * T4c);
Chris@82 1575 }
Chris@82 1576 }
Chris@82 1577 }
Chris@82 1578 }
Chris@82 1579 }
Chris@82 1580
Chris@82 1581 static const kr2c_desc desc = { 64, "r2cbIII_64", {342, 116, 92, 0}, &GENUS };
Chris@82 1582
Chris@82 1583 void X(codelet_r2cbIII_64) (planner *p) {
Chris@82 1584 X(kr2c_register) (p, r2cbIII_64, &desc);
Chris@82 1585 }
Chris@82 1586
Chris@82 1587 #endif