annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cfII_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:44 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 434 FP additions, 320 FP multiplications,
Chris@82 32 * (or, 114 additions, 0 multiplications, 320 fused multiply/add),
Chris@82 33 * 118 stack variables, 31 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cfII.h"
Chris@82 36
Chris@82 37 static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP941544065, +0.941544065183020778412509402599502357185589796);
Chris@82 40 DK(KP903989293, +0.903989293123443331586200297230537048710132025);
Chris@82 41 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 42 DK(KP472964775, +0.472964775891319928124438237972992463904131113);
Chris@82 43 DK(KP357805721, +0.357805721314524104672487743774474392487532769);
Chris@82 44 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@82 45 DK(KP989176509, +0.989176509964780973451673738016243063983689533);
Chris@82 46 DK(KP803207531, +0.803207531480644909806676512963141923879569427);
Chris@82 47 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 48 DK(KP741650546, +0.741650546272035369581266691172079863842265220);
Chris@82 49 DK(KP148335987, +0.148335987538347428753676511486911367000625355);
Chris@82 50 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@82 51 DK(KP998795456, +0.998795456205172392714771604759100694443203615);
Chris@82 52 DK(KP740951125, +0.740951125354959091175616897495162729728955309);
Chris@82 53 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 54 DK(KP906347169, +0.906347169019147157946142717268914412664134293);
Chris@82 55 DK(KP049126849, +0.049126849769467254105343321271313617079695752);
Chris@82 56 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@82 57 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 58 DK(KP970031253, +0.970031253194543992603984207286100251456865962);
Chris@82 59 DK(KP857728610, +0.857728610000272069902269984284770137042490799);
Chris@82 60 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 61 DK(KP599376933, +0.599376933681923766271389869014404232837890546);
Chris@82 62 DK(KP250486960, +0.250486960191305461595702160124721208578685568);
Chris@82 63 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@82 64 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 65 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 66 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 67 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 68 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 69 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 70 {
Chris@82 71 INT i;
Chris@82 72 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 73 E Tm, T35, T3Z, T5h, Tv, T34, T3W, T5g, Td, T33, T6z, T6N, T3T, T5f, T65;
Chris@82 74 E T6j, T2b, T3n, T4O, T5C, T2C, T3q, T4D, T5z, TK, T3b, T4e, T5l, TR, T3c;
Chris@82 75 E T4b, T5k, T15, T38, T47, T5o, T1c, T39, T44, T5n, T1s, T3g, T4v, T5v, T1T;
Chris@82 76 E T3j, T4k, T5s, T2u, T3r, T4R, T5A, T2F, T3o, T4K, T5D, T1L, T3k, T4y, T5t;
Chris@82 77 E T1W, T3h, T4r, T5w;
Chris@82 78 {
Chris@82 79 E Te, Tj, Th, Tk, Tf, Tg;
Chris@82 80 Te = R0[WS(rs, 14)];
Chris@82 81 Tj = R0[WS(rs, 30)];
Chris@82 82 Tf = R0[WS(rs, 6)];
Chris@82 83 Tg = R0[WS(rs, 22)];
Chris@82 84 Th = Tf + Tg;
Chris@82 85 Tk = Tg - Tf;
Chris@82 86 {
Chris@82 87 E Ti, Tl, T3X, T3Y;
Chris@82 88 Ti = FNMS(KP707106781, Th, Te);
Chris@82 89 Tl = FNMS(KP707106781, Tk, Tj);
Chris@82 90 Tm = FNMS(KP668178637, Tl, Ti);
Chris@82 91 T35 = FMA(KP668178637, Ti, Tl);
Chris@82 92 T3X = FMA(KP707106781, Th, Te);
Chris@82 93 T3Y = FMA(KP707106781, Tk, Tj);
Chris@82 94 T3Z = FMA(KP198912367, T3Y, T3X);
Chris@82 95 T5h = FNMS(KP198912367, T3X, T3Y);
Chris@82 96 }
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E Tn, Ts, Tq, Tt, To, Tp;
Chris@82 100 Tn = R0[WS(rs, 18)];
Chris@82 101 Ts = R0[WS(rs, 2)];
Chris@82 102 To = R0[WS(rs, 10)];
Chris@82 103 Tp = R0[WS(rs, 26)];
Chris@82 104 Tq = To + Tp;
Chris@82 105 Tt = To - Tp;
Chris@82 106 {
Chris@82 107 E Tr, Tu, T3U, T3V;
Chris@82 108 Tr = FNMS(KP707106781, Tq, Tn);
Chris@82 109 Tu = FNMS(KP707106781, Tt, Ts);
Chris@82 110 Tv = FNMS(KP668178637, Tu, Tr);
Chris@82 111 T34 = FMA(KP668178637, Tr, Tu);
Chris@82 112 T3U = FMA(KP707106781, Tq, Tn);
Chris@82 113 T3V = FMA(KP707106781, Tt, Ts);
Chris@82 114 T3W = FMA(KP198912367, T3V, T3U);
Chris@82 115 T5g = FNMS(KP198912367, T3U, T3V);
Chris@82 116 }
Chris@82 117 }
Chris@82 118 {
Chris@82 119 E T1, T61, T4, T62, T8, T3Q, Tb, T3R, T2, T3;
Chris@82 120 T1 = R0[0];
Chris@82 121 T61 = R0[WS(rs, 16)];
Chris@82 122 T2 = R0[WS(rs, 8)];
Chris@82 123 T3 = R0[WS(rs, 24)];
Chris@82 124 T4 = T2 - T3;
Chris@82 125 T62 = T2 + T3;
Chris@82 126 {
Chris@82 127 E T6, T7, T9, Ta;
Chris@82 128 T6 = R0[WS(rs, 20)];
Chris@82 129 T7 = R0[WS(rs, 4)];
Chris@82 130 T8 = FMA(KP414213562, T7, T6);
Chris@82 131 T3Q = FNMS(KP414213562, T6, T7);
Chris@82 132 T9 = R0[WS(rs, 12)];
Chris@82 133 Ta = R0[WS(rs, 28)];
Chris@82 134 Tb = FMA(KP414213562, Ta, T9);
Chris@82 135 T3R = FMS(KP414213562, T9, Ta);
Chris@82 136 }
Chris@82 137 {
Chris@82 138 E T5, Tc, T6x, T6y;
Chris@82 139 T5 = FNMS(KP707106781, T4, T1);
Chris@82 140 Tc = T8 - Tb;
Chris@82 141 Td = FNMS(KP923879532, Tc, T5);
Chris@82 142 T33 = FMA(KP923879532, Tc, T5);
Chris@82 143 T6x = FNMS(KP707106781, T62, T61);
Chris@82 144 T6y = T3R - T3Q;
Chris@82 145 T6z = FMA(KP923879532, T6y, T6x);
Chris@82 146 T6N = FNMS(KP923879532, T6y, T6x);
Chris@82 147 }
Chris@82 148 {
Chris@82 149 E T3P, T3S, T63, T64;
Chris@82 150 T3P = FMA(KP707106781, T4, T1);
Chris@82 151 T3S = T3Q + T3R;
Chris@82 152 T3T = FNMS(KP923879532, T3S, T3P);
Chris@82 153 T5f = FMA(KP923879532, T3S, T3P);
Chris@82 154 T63 = FMA(KP707106781, T62, T61);
Chris@82 155 T64 = T8 + Tb;
Chris@82 156 T65 = FMA(KP923879532, T64, T63);
Chris@82 157 T6j = FNMS(KP923879532, T64, T63);
Chris@82 158 }
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E T1Z, T2w, T22, T2x, T26, T2A, T29, T2z, T20, T21;
Chris@82 162 T1Z = R1[WS(rs, 31)];
Chris@82 163 T2w = R1[WS(rs, 15)];
Chris@82 164 T20 = R1[WS(rs, 7)];
Chris@82 165 T21 = R1[WS(rs, 23)];
Chris@82 166 T22 = T20 - T21;
Chris@82 167 T2x = T20 + T21;
Chris@82 168 {
Chris@82 169 E T24, T25, T27, T28;
Chris@82 170 T24 = R1[WS(rs, 19)];
Chris@82 171 T25 = R1[WS(rs, 3)];
Chris@82 172 T26 = FMA(KP414213562, T25, T24);
Chris@82 173 T2A = FNMS(KP414213562, T24, T25);
Chris@82 174 T27 = R1[WS(rs, 11)];
Chris@82 175 T28 = R1[WS(rs, 27)];
Chris@82 176 T29 = FMA(KP414213562, T28, T27);
Chris@82 177 T2z = FMS(KP414213562, T27, T28);
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E T23, T2a, T4M, T4N;
Chris@82 181 T23 = FMA(KP707106781, T22, T1Z);
Chris@82 182 T2a = T26 - T29;
Chris@82 183 T2b = FMA(KP923879532, T2a, T23);
Chris@82 184 T3n = FNMS(KP923879532, T2a, T23);
Chris@82 185 T4M = FMA(KP707106781, T2x, T2w);
Chris@82 186 T4N = T26 + T29;
Chris@82 187 T4O = FNMS(KP923879532, T4N, T4M);
Chris@82 188 T5C = FMA(KP923879532, T4N, T4M);
Chris@82 189 }
Chris@82 190 {
Chris@82 191 E T2y, T2B, T4B, T4C;
Chris@82 192 T2y = FNMS(KP707106781, T2x, T2w);
Chris@82 193 T2B = T2z - T2A;
Chris@82 194 T2C = FNMS(KP923879532, T2B, T2y);
Chris@82 195 T3q = FMA(KP923879532, T2B, T2y);
Chris@82 196 T4B = FMS(KP707106781, T22, T1Z);
Chris@82 197 T4C = T2A + T2z;
Chris@82 198 T4D = FNMS(KP923879532, T4C, T4B);
Chris@82 199 T5z = FMA(KP923879532, T4C, T4B);
Chris@82 200 }
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E Ty, TL, TB, TM, TF, TP, TI, TO, Tz, TA;
Chris@82 204 Ty = R0[WS(rs, 17)];
Chris@82 205 TL = R0[WS(rs, 1)];
Chris@82 206 Tz = R0[WS(rs, 9)];
Chris@82 207 TA = R0[WS(rs, 25)];
Chris@82 208 TB = Tz + TA;
Chris@82 209 TM = Tz - TA;
Chris@82 210 {
Chris@82 211 E TD, TE, TG, TH;
Chris@82 212 TD = R0[WS(rs, 29)];
Chris@82 213 TE = R0[WS(rs, 13)];
Chris@82 214 TF = FMS(KP414213562, TE, TD);
Chris@82 215 TP = FMA(KP414213562, TD, TE);
Chris@82 216 TG = R0[WS(rs, 5)];
Chris@82 217 TH = R0[WS(rs, 21)];
Chris@82 218 TI = FNMS(KP414213562, TH, TG);
Chris@82 219 TO = FMA(KP414213562, TG, TH);
Chris@82 220 }
Chris@82 221 {
Chris@82 222 E TC, TJ, T4c, T4d;
Chris@82 223 TC = FNMS(KP707106781, TB, Ty);
Chris@82 224 TJ = TF - TI;
Chris@82 225 TK = FNMS(KP923879532, TJ, TC);
Chris@82 226 T3b = FMA(KP923879532, TJ, TC);
Chris@82 227 T4c = FMA(KP707106781, TM, TL);
Chris@82 228 T4d = TI + TF;
Chris@82 229 T4e = FNMS(KP923879532, T4d, T4c);
Chris@82 230 T5l = FMA(KP923879532, T4d, T4c);
Chris@82 231 }
Chris@82 232 {
Chris@82 233 E TN, TQ, T49, T4a;
Chris@82 234 TN = FNMS(KP707106781, TM, TL);
Chris@82 235 TQ = TO - TP;
Chris@82 236 TR = FNMS(KP923879532, TQ, TN);
Chris@82 237 T3c = FMA(KP923879532, TQ, TN);
Chris@82 238 T49 = FMA(KP707106781, TB, Ty);
Chris@82 239 T4a = TO + TP;
Chris@82 240 T4b = FNMS(KP923879532, T4a, T49);
Chris@82 241 T5k = FMA(KP923879532, T4a, T49);
Chris@82 242 }
Chris@82 243 }
Chris@82 244 {
Chris@82 245 E TT, T16, TW, T17, T10, T1a, T13, T19, TU, TV;
Chris@82 246 TT = R0[WS(rs, 15)];
Chris@82 247 T16 = R0[WS(rs, 31)];
Chris@82 248 TU = R0[WS(rs, 7)];
Chris@82 249 TV = R0[WS(rs, 23)];
Chris@82 250 TW = TU + TV;
Chris@82 251 T17 = TV - TU;
Chris@82 252 {
Chris@82 253 E TY, TZ, T11, T12;
Chris@82 254 TY = R0[WS(rs, 3)];
Chris@82 255 TZ = R0[WS(rs, 19)];
Chris@82 256 T10 = FMS(KP414213562, TZ, TY);
Chris@82 257 T1a = FMA(KP414213562, TY, TZ);
Chris@82 258 T11 = R0[WS(rs, 27)];
Chris@82 259 T12 = R0[WS(rs, 11)];
Chris@82 260 T13 = FNMS(KP414213562, T12, T11);
Chris@82 261 T19 = FMA(KP414213562, T11, T12);
Chris@82 262 }
Chris@82 263 {
Chris@82 264 E TX, T14, T45, T46;
Chris@82 265 TX = FNMS(KP707106781, TW, TT);
Chris@82 266 T14 = T10 - T13;
Chris@82 267 T15 = FNMS(KP923879532, T14, TX);
Chris@82 268 T38 = FMA(KP923879532, T14, TX);
Chris@82 269 T45 = FMA(KP707106781, T17, T16);
Chris@82 270 T46 = T10 + T13;
Chris@82 271 T47 = FNMS(KP923879532, T46, T45);
Chris@82 272 T5o = FMA(KP923879532, T46, T45);
Chris@82 273 }
Chris@82 274 {
Chris@82 275 E T18, T1b, T42, T43;
Chris@82 276 T18 = FNMS(KP707106781, T17, T16);
Chris@82 277 T1b = T19 - T1a;
Chris@82 278 T1c = FNMS(KP923879532, T1b, T18);
Chris@82 279 T39 = FMA(KP923879532, T1b, T18);
Chris@82 280 T42 = FMA(KP707106781, TW, TT);
Chris@82 281 T43 = T1a + T19;
Chris@82 282 T44 = FNMS(KP923879532, T43, T42);
Chris@82 283 T5n = FMA(KP923879532, T43, T42);
Chris@82 284 }
Chris@82 285 }
Chris@82 286 {
Chris@82 287 E T1g, T1N, T1j, T1O, T1n, T1R, T1q, T1Q, T1h, T1i;
Chris@82 288 T1g = R1[0];
Chris@82 289 T1N = R1[WS(rs, 16)];
Chris@82 290 T1h = R1[WS(rs, 8)];
Chris@82 291 T1i = R1[WS(rs, 24)];
Chris@82 292 T1j = T1h - T1i;
Chris@82 293 T1O = T1h + T1i;
Chris@82 294 {
Chris@82 295 E T1l, T1m, T1o, T1p;
Chris@82 296 T1l = R1[WS(rs, 20)];
Chris@82 297 T1m = R1[WS(rs, 4)];
Chris@82 298 T1n = FMA(KP414213562, T1m, T1l);
Chris@82 299 T1R = FNMS(KP414213562, T1l, T1m);
Chris@82 300 T1o = R1[WS(rs, 12)];
Chris@82 301 T1p = R1[WS(rs, 28)];
Chris@82 302 T1q = FMA(KP414213562, T1p, T1o);
Chris@82 303 T1Q = FMS(KP414213562, T1o, T1p);
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E T1k, T1r, T4t, T4u;
Chris@82 307 T1k = FNMS(KP707106781, T1j, T1g);
Chris@82 308 T1r = T1n - T1q;
Chris@82 309 T1s = FNMS(KP923879532, T1r, T1k);
Chris@82 310 T3g = FMA(KP923879532, T1r, T1k);
Chris@82 311 T4t = FMA(KP707106781, T1O, T1N);
Chris@82 312 T4u = T1n + T1q;
Chris@82 313 T4v = FNMS(KP923879532, T4u, T4t);
Chris@82 314 T5v = FMA(KP923879532, T4u, T4t);
Chris@82 315 }
Chris@82 316 {
Chris@82 317 E T1P, T1S, T4i, T4j;
Chris@82 318 T1P = FNMS(KP707106781, T1O, T1N);
Chris@82 319 T1S = T1Q - T1R;
Chris@82 320 T1T = FNMS(KP923879532, T1S, T1P);
Chris@82 321 T3j = FMA(KP923879532, T1S, T1P);
Chris@82 322 T4i = FMA(KP707106781, T1j, T1g);
Chris@82 323 T4j = T1R + T1Q;
Chris@82 324 T4k = FNMS(KP923879532, T4j, T4i);
Chris@82 325 T5s = FMA(KP923879532, T4j, T4i);
Chris@82 326 }
Chris@82 327 }
Chris@82 328 {
Chris@82 329 E T2g, T4I, T2j, T4H, T2p, T4F, T2s, T4E;
Chris@82 330 {
Chris@82 331 E T2c, T2h, T2f, T2i, T2d, T2e;
Chris@82 332 T2c = R1[WS(rs, 13)];
Chris@82 333 T2h = R1[WS(rs, 29)];
Chris@82 334 T2d = R1[WS(rs, 5)];
Chris@82 335 T2e = R1[WS(rs, 21)];
Chris@82 336 T2f = T2d + T2e;
Chris@82 337 T2i = T2d - T2e;
Chris@82 338 T2g = FNMS(KP707106781, T2f, T2c);
Chris@82 339 T4I = FMS(KP707106781, T2i, T2h);
Chris@82 340 T2j = FMA(KP707106781, T2i, T2h);
Chris@82 341 T4H = FMA(KP707106781, T2f, T2c);
Chris@82 342 }
Chris@82 343 {
Chris@82 344 E T2l, T2q, T2o, T2r, T2m, T2n;
Chris@82 345 T2l = R1[WS(rs, 17)];
Chris@82 346 T2q = R1[WS(rs, 1)];
Chris@82 347 T2m = R1[WS(rs, 9)];
Chris@82 348 T2n = R1[WS(rs, 25)];
Chris@82 349 T2o = T2m + T2n;
Chris@82 350 T2r = T2m - T2n;
Chris@82 351 T2p = FNMS(KP707106781, T2o, T2l);
Chris@82 352 T4F = FMA(KP707106781, T2r, T2q);
Chris@82 353 T2s = FNMS(KP707106781, T2r, T2q);
Chris@82 354 T4E = FMA(KP707106781, T2o, T2l);
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T2k, T2t, T4P, T4Q;
Chris@82 358 T2k = FNMS(KP668178637, T2j, T2g);
Chris@82 359 T2t = FNMS(KP668178637, T2s, T2p);
Chris@82 360 T2u = T2k - T2t;
Chris@82 361 T3r = T2t + T2k;
Chris@82 362 T4P = FMA(KP198912367, T4H, T4I);
Chris@82 363 T4Q = FNMS(KP198912367, T4E, T4F);
Chris@82 364 T4R = T4P - T4Q;
Chris@82 365 T5A = T4Q + T4P;
Chris@82 366 }
Chris@82 367 {
Chris@82 368 E T2D, T2E, T4G, T4J;
Chris@82 369 T2D = FMA(KP668178637, T2p, T2s);
Chris@82 370 T2E = FMA(KP668178637, T2g, T2j);
Chris@82 371 T2F = T2D + T2E;
Chris@82 372 T3o = T2D - T2E;
Chris@82 373 T4G = FMA(KP198912367, T4F, T4E);
Chris@82 374 T4J = FNMS(KP198912367, T4I, T4H);
Chris@82 375 T4K = T4G - T4J;
Chris@82 376 T5D = T4G + T4J;
Chris@82 377 }
Chris@82 378 }
Chris@82 379 {
Chris@82 380 E T1x, T4p, T1A, T4o, T1G, T4m, T1J, T4l;
Chris@82 381 {
Chris@82 382 E T1t, T1y, T1w, T1z, T1u, T1v;
Chris@82 383 T1t = R1[WS(rs, 14)];
Chris@82 384 T1y = R1[WS(rs, 30)];
Chris@82 385 T1u = R1[WS(rs, 6)];
Chris@82 386 T1v = R1[WS(rs, 22)];
Chris@82 387 T1w = T1u + T1v;
Chris@82 388 T1z = T1u - T1v;
Chris@82 389 T1x = FNMS(KP707106781, T1w, T1t);
Chris@82 390 T4p = FMS(KP707106781, T1z, T1y);
Chris@82 391 T1A = FMA(KP707106781, T1z, T1y);
Chris@82 392 T4o = FMA(KP707106781, T1w, T1t);
Chris@82 393 }
Chris@82 394 {
Chris@82 395 E T1C, T1H, T1F, T1I, T1D, T1E;
Chris@82 396 T1C = R1[WS(rs, 18)];
Chris@82 397 T1H = R1[WS(rs, 2)];
Chris@82 398 T1D = R1[WS(rs, 10)];
Chris@82 399 T1E = R1[WS(rs, 26)];
Chris@82 400 T1F = T1D + T1E;
Chris@82 401 T1I = T1D - T1E;
Chris@82 402 T1G = FNMS(KP707106781, T1F, T1C);
Chris@82 403 T4m = FMA(KP707106781, T1I, T1H);
Chris@82 404 T1J = FNMS(KP707106781, T1I, T1H);
Chris@82 405 T4l = FMA(KP707106781, T1F, T1C);
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E T1B, T1K, T4w, T4x;
Chris@82 409 T1B = FNMS(KP668178637, T1A, T1x);
Chris@82 410 T1K = FNMS(KP668178637, T1J, T1G);
Chris@82 411 T1L = T1B - T1K;
Chris@82 412 T3k = T1K + T1B;
Chris@82 413 T4w = FMA(KP198912367, T4o, T4p);
Chris@82 414 T4x = FNMS(KP198912367, T4l, T4m);
Chris@82 415 T4y = T4w - T4x;
Chris@82 416 T5t = T4x + T4w;
Chris@82 417 }
Chris@82 418 {
Chris@82 419 E T1U, T1V, T4n, T4q;
Chris@82 420 T1U = FMA(KP668178637, T1G, T1J);
Chris@82 421 T1V = FMA(KP668178637, T1x, T1A);
Chris@82 422 T1W = T1U + T1V;
Chris@82 423 T3h = T1U - T1V;
Chris@82 424 T4n = FMA(KP198912367, T4m, T4l);
Chris@82 425 T4q = FNMS(KP198912367, T4p, T4o);
Chris@82 426 T4r = T4n - T4q;
Chris@82 427 T5w = T4n + T4q;
Chris@82 428 }
Chris@82 429 }
Chris@82 430 {
Chris@82 431 E Tx, T2N, T6P, T6V, T1e, T6Q, T2X, T31, T1Y, T2L, T2Q, T6W, T2U, T30, T2H;
Chris@82 432 E T2K, Tw, T6O;
Chris@82 433 Tw = Tm - Tv;
Chris@82 434 Tx = FNMS(KP831469612, Tw, Td);
Chris@82 435 T2N = FMA(KP831469612, Tw, Td);
Chris@82 436 T6O = T34 + T35;
Chris@82 437 T6P = FMA(KP831469612, T6O, T6N);
Chris@82 438 T6V = FNMS(KP831469612, T6O, T6N);
Chris@82 439 {
Chris@82 440 E TS, T1d, T2V, T2W;
Chris@82 441 TS = FMA(KP534511135, TR, TK);
Chris@82 442 T1d = FMA(KP534511135, T1c, T15);
Chris@82 443 T1e = TS - T1d;
Chris@82 444 T6Q = TS + T1d;
Chris@82 445 T2V = FNMS(KP831469612, T2u, T2b);
Chris@82 446 T2W = FMA(KP831469612, T2F, T2C);
Chris@82 447 T2X = FNMS(KP250486960, T2W, T2V);
Chris@82 448 T31 = FMA(KP250486960, T2V, T2W);
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E T1M, T1X, T2O, T2P;
Chris@82 452 T1M = FNMS(KP831469612, T1L, T1s);
Chris@82 453 T1X = FNMS(KP831469612, T1W, T1T);
Chris@82 454 T1Y = FMA(KP599376933, T1X, T1M);
Chris@82 455 T2L = FNMS(KP599376933, T1M, T1X);
Chris@82 456 T2O = FNMS(KP534511135, TK, TR);
Chris@82 457 T2P = FNMS(KP534511135, T15, T1c);
Chris@82 458 T2Q = T2O - T2P;
Chris@82 459 T6W = T2O + T2P;
Chris@82 460 }
Chris@82 461 {
Chris@82 462 E T2S, T2T, T2v, T2G;
Chris@82 463 T2S = FMA(KP831469612, T1L, T1s);
Chris@82 464 T2T = FMA(KP831469612, T1W, T1T);
Chris@82 465 T2U = FNMS(KP250486960, T2T, T2S);
Chris@82 466 T30 = FMA(KP250486960, T2S, T2T);
Chris@82 467 T2v = FMA(KP831469612, T2u, T2b);
Chris@82 468 T2G = FNMS(KP831469612, T2F, T2C);
Chris@82 469 T2H = FMA(KP599376933, T2G, T2v);
Chris@82 470 T2K = FNMS(KP599376933, T2v, T2G);
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T1f, T2I, T6X, T6Y;
Chris@82 474 T1f = FMA(KP881921264, T1e, Tx);
Chris@82 475 T2I = T1Y - T2H;
Chris@82 476 Cr[WS(csr, 26)] = FNMS(KP857728610, T2I, T1f);
Chris@82 477 Cr[WS(csr, 5)] = FMA(KP857728610, T2I, T1f);
Chris@82 478 T6X = FNMS(KP881921264, T6W, T6V);
Chris@82 479 T6Y = T2L + T2K;
Chris@82 480 Ci[WS(csi, 26)] = FMS(KP857728610, T6Y, T6X);
Chris@82 481 Ci[WS(csi, 5)] = FMA(KP857728610, T6Y, T6X);
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T2J, T2M, T6Z, T70;
Chris@82 485 T2J = FNMS(KP881921264, T1e, Tx);
Chris@82 486 T2M = T2K - T2L;
Chris@82 487 Cr[WS(csr, 21)] = FNMS(KP857728610, T2M, T2J);
Chris@82 488 Cr[WS(csr, 10)] = FMA(KP857728610, T2M, T2J);
Chris@82 489 T6Z = FMA(KP881921264, T6W, T6V);
Chris@82 490 T70 = T1Y + T2H;
Chris@82 491 Ci[WS(csi, 10)] = -(FMA(KP857728610, T70, T6Z));
Chris@82 492 Ci[WS(csi, 21)] = FNMS(KP857728610, T70, T6Z);
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T2R, T2Y, T6R, T6S;
Chris@82 496 T2R = FMA(KP881921264, T2Q, T2N);
Chris@82 497 T2Y = T2U - T2X;
Chris@82 498 Cr[WS(csr, 29)] = FNMS(KP970031253, T2Y, T2R);
Chris@82 499 Cr[WS(csr, 2)] = FMA(KP970031253, T2Y, T2R);
Chris@82 500 T6R = FMA(KP881921264, T6Q, T6P);
Chris@82 501 T6S = T30 + T31;
Chris@82 502 Ci[WS(csi, 2)] = -(FMA(KP970031253, T6S, T6R));
Chris@82 503 Ci[WS(csi, 29)] = FNMS(KP970031253, T6S, T6R);
Chris@82 504 }
Chris@82 505 {
Chris@82 506 E T2Z, T32, T6T, T6U;
Chris@82 507 T2Z = FNMS(KP881921264, T2Q, T2N);
Chris@82 508 T32 = T30 - T31;
Chris@82 509 Cr[WS(csr, 18)] = FNMS(KP970031253, T32, T2Z);
Chris@82 510 Cr[WS(csr, 13)] = FMA(KP970031253, T32, T2Z);
Chris@82 511 T6T = FNMS(KP881921264, T6Q, T6P);
Chris@82 512 T6U = T2U + T2X;
Chris@82 513 Ci[WS(csi, 18)] = -(FMA(KP970031253, T6U, T6T));
Chris@82 514 Ci[WS(csi, 13)] = FNMS(KP970031253, T6U, T6T);
Chris@82 515 }
Chris@82 516 }
Chris@82 517 {
Chris@82 518 E T5j, T5L, T67, T6d, T5q, T68, T5V, T5Z, T5y, T5J, T5O, T6e, T5S, T5Y, T5F;
Chris@82 519 E T5I, T5i, T66;
Chris@82 520 T5i = T5g - T5h;
Chris@82 521 T5j = FNMS(KP980785280, T5i, T5f);
Chris@82 522 T5L = FMA(KP980785280, T5i, T5f);
Chris@82 523 T66 = T3W + T3Z;
Chris@82 524 T67 = FMA(KP980785280, T66, T65);
Chris@82 525 T6d = FNMS(KP980785280, T66, T65);
Chris@82 526 {
Chris@82 527 E T5m, T5p, T5T, T5U;
Chris@82 528 T5m = FMA(KP098491403, T5l, T5k);
Chris@82 529 T5p = FMA(KP098491403, T5o, T5n);
Chris@82 530 T5q = T5m - T5p;
Chris@82 531 T68 = T5m + T5p;
Chris@82 532 T5T = FMA(KP980785280, T5A, T5z);
Chris@82 533 T5U = FMA(KP980785280, T5D, T5C);
Chris@82 534 T5V = FMA(KP049126849, T5U, T5T);
Chris@82 535 T5Z = FNMS(KP049126849, T5T, T5U);
Chris@82 536 }
Chris@82 537 {
Chris@82 538 E T5u, T5x, T5M, T5N;
Chris@82 539 T5u = FNMS(KP980785280, T5t, T5s);
Chris@82 540 T5x = FNMS(KP980785280, T5w, T5v);
Chris@82 541 T5y = FMA(KP906347169, T5x, T5u);
Chris@82 542 T5J = FNMS(KP906347169, T5u, T5x);
Chris@82 543 T5M = FNMS(KP098491403, T5k, T5l);
Chris@82 544 T5N = FNMS(KP098491403, T5n, T5o);
Chris@82 545 T5O = T5M - T5N;
Chris@82 546 T6e = T5M + T5N;
Chris@82 547 }
Chris@82 548 {
Chris@82 549 E T5Q, T5R, T5B, T5E;
Chris@82 550 T5Q = FMA(KP980785280, T5t, T5s);
Chris@82 551 T5R = FMA(KP980785280, T5w, T5v);
Chris@82 552 T5S = FNMS(KP049126849, T5R, T5Q);
Chris@82 553 T5Y = FMA(KP049126849, T5Q, T5R);
Chris@82 554 T5B = FNMS(KP980785280, T5A, T5z);
Chris@82 555 T5E = FNMS(KP980785280, T5D, T5C);
Chris@82 556 T5F = FNMS(KP906347169, T5E, T5B);
Chris@82 557 T5I = FMA(KP906347169, T5B, T5E);
Chris@82 558 }
Chris@82 559 {
Chris@82 560 E T5r, T5G, T6f, T6g;
Chris@82 561 T5r = FMA(KP995184726, T5q, T5j);
Chris@82 562 T5G = T5y + T5F;
Chris@82 563 Cr[WS(csr, 24)] = FNMS(KP740951125, T5G, T5r);
Chris@82 564 Cr[WS(csr, 7)] = FMA(KP740951125, T5G, T5r);
Chris@82 565 T6f = FNMS(KP995184726, T6e, T6d);
Chris@82 566 T6g = T5J + T5I;
Chris@82 567 Ci[WS(csi, 24)] = FMS(KP740951125, T6g, T6f);
Chris@82 568 Ci[WS(csi, 7)] = FMA(KP740951125, T6g, T6f);
Chris@82 569 }
Chris@82 570 {
Chris@82 571 E T5H, T5K, T6h, T6i;
Chris@82 572 T5H = FNMS(KP995184726, T5q, T5j);
Chris@82 573 T5K = T5I - T5J;
Chris@82 574 Cr[WS(csr, 23)] = FNMS(KP740951125, T5K, T5H);
Chris@82 575 Cr[WS(csr, 8)] = FMA(KP740951125, T5K, T5H);
Chris@82 576 T6h = FMA(KP995184726, T6e, T6d);
Chris@82 577 T6i = T5F - T5y;
Chris@82 578 Ci[WS(csi, 8)] = FMS(KP740951125, T6i, T6h);
Chris@82 579 Ci[WS(csi, 23)] = FMA(KP740951125, T6i, T6h);
Chris@82 580 }
Chris@82 581 {
Chris@82 582 E T5P, T5W, T69, T6a;
Chris@82 583 T5P = FMA(KP995184726, T5O, T5L);
Chris@82 584 T5W = T5S + T5V;
Chris@82 585 Cr[WS(csr, 31)] = FNMS(KP998795456, T5W, T5P);
Chris@82 586 Cr[0] = FMA(KP998795456, T5W, T5P);
Chris@82 587 T69 = FMA(KP995184726, T68, T67);
Chris@82 588 T6a = T5Y + T5Z;
Chris@82 589 Ci[0] = -(FMA(KP998795456, T6a, T69));
Chris@82 590 Ci[WS(csi, 31)] = FNMS(KP998795456, T6a, T69);
Chris@82 591 }
Chris@82 592 {
Chris@82 593 E T5X, T60, T6b, T6c;
Chris@82 594 T5X = FNMS(KP995184726, T5O, T5L);
Chris@82 595 T60 = T5Y - T5Z;
Chris@82 596 Cr[WS(csr, 16)] = FNMS(KP998795456, T60, T5X);
Chris@82 597 Cr[WS(csr, 15)] = FMA(KP998795456, T60, T5X);
Chris@82 598 T6b = FNMS(KP995184726, T68, T67);
Chris@82 599 T6c = T5V - T5S;
Chris@82 600 Ci[WS(csi, 16)] = FMS(KP998795456, T6c, T6b);
Chris@82 601 Ci[WS(csi, 15)] = FMA(KP998795456, T6c, T6b);
Chris@82 602 }
Chris@82 603 }
Chris@82 604 {
Chris@82 605 E T37, T3z, T6B, T6H, T3e, T6C, T3J, T3M, T3m, T3w, T3C, T6I, T3G, T3N, T3t;
Chris@82 606 E T3x, T36, T6A;
Chris@82 607 T36 = T34 - T35;
Chris@82 608 T37 = FNMS(KP831469612, T36, T33);
Chris@82 609 T3z = FMA(KP831469612, T36, T33);
Chris@82 610 T6A = Tv + Tm;
Chris@82 611 T6B = FMA(KP831469612, T6A, T6z);
Chris@82 612 T6H = FNMS(KP831469612, T6A, T6z);
Chris@82 613 {
Chris@82 614 E T3a, T3d, T3H, T3I;
Chris@82 615 T3a = FNMS(KP303346683, T39, T38);
Chris@82 616 T3d = FNMS(KP303346683, T3c, T3b);
Chris@82 617 T3e = T3a - T3d;
Chris@82 618 T6C = T3d + T3a;
Chris@82 619 T3H = FNMS(KP831469612, T3o, T3n);
Chris@82 620 T3I = FMA(KP831469612, T3r, T3q);
Chris@82 621 T3J = FMA(KP148335987, T3I, T3H);
Chris@82 622 T3M = FNMS(KP148335987, T3H, T3I);
Chris@82 623 }
Chris@82 624 {
Chris@82 625 E T3i, T3l, T3A, T3B;
Chris@82 626 T3i = FNMS(KP831469612, T3h, T3g);
Chris@82 627 T3l = FNMS(KP831469612, T3k, T3j);
Chris@82 628 T3m = FNMS(KP741650546, T3l, T3i);
Chris@82 629 T3w = FMA(KP741650546, T3i, T3l);
Chris@82 630 T3A = FMA(KP303346683, T3b, T3c);
Chris@82 631 T3B = FMA(KP303346683, T38, T39);
Chris@82 632 T3C = T3A - T3B;
Chris@82 633 T6I = T3A + T3B;
Chris@82 634 }
Chris@82 635 {
Chris@82 636 E T3E, T3F, T3p, T3s;
Chris@82 637 T3E = FMA(KP831469612, T3h, T3g);
Chris@82 638 T3F = FMA(KP831469612, T3k, T3j);
Chris@82 639 T3G = FMA(KP148335987, T3F, T3E);
Chris@82 640 T3N = FNMS(KP148335987, T3E, T3F);
Chris@82 641 T3p = FMA(KP831469612, T3o, T3n);
Chris@82 642 T3s = FNMS(KP831469612, T3r, T3q);
Chris@82 643 T3t = FNMS(KP741650546, T3s, T3p);
Chris@82 644 T3x = FMA(KP741650546, T3p, T3s);
Chris@82 645 }
Chris@82 646 {
Chris@82 647 E T3f, T3u, T6J, T6K;
Chris@82 648 T3f = FMA(KP956940335, T3e, T37);
Chris@82 649 T3u = T3m - T3t;
Chris@82 650 Cr[WS(csr, 25)] = FNMS(KP803207531, T3u, T3f);
Chris@82 651 Cr[WS(csr, 6)] = FMA(KP803207531, T3u, T3f);
Chris@82 652 T6J = FMA(KP956940335, T6I, T6H);
Chris@82 653 T6K = T3w + T3x;
Chris@82 654 Ci[WS(csi, 6)] = -(FMA(KP803207531, T6K, T6J));
Chris@82 655 Ci[WS(csi, 25)] = FNMS(KP803207531, T6K, T6J);
Chris@82 656 }
Chris@82 657 {
Chris@82 658 E T3v, T3y, T6L, T6M;
Chris@82 659 T3v = FNMS(KP956940335, T3e, T37);
Chris@82 660 T3y = T3w - T3x;
Chris@82 661 Cr[WS(csr, 22)] = FNMS(KP803207531, T3y, T3v);
Chris@82 662 Cr[WS(csr, 9)] = FMA(KP803207531, T3y, T3v);
Chris@82 663 T6L = FNMS(KP956940335, T6I, T6H);
Chris@82 664 T6M = T3m + T3t;
Chris@82 665 Ci[WS(csi, 22)] = -(FMA(KP803207531, T6M, T6L));
Chris@82 666 Ci[WS(csi, 9)] = FNMS(KP803207531, T6M, T6L);
Chris@82 667 }
Chris@82 668 {
Chris@82 669 E T3D, T3K, T6D, T6E;
Chris@82 670 T3D = FMA(KP956940335, T3C, T3z);
Chris@82 671 T3K = T3G - T3J;
Chris@82 672 Cr[WS(csr, 30)] = FNMS(KP989176509, T3K, T3D);
Chris@82 673 Cr[WS(csr, 1)] = FMA(KP989176509, T3K, T3D);
Chris@82 674 T6D = FMA(KP956940335, T6C, T6B);
Chris@82 675 T6E = T3N + T3M;
Chris@82 676 Ci[WS(csi, 30)] = FMS(KP989176509, T6E, T6D);
Chris@82 677 Ci[WS(csi, 1)] = FMA(KP989176509, T6E, T6D);
Chris@82 678 }
Chris@82 679 {
Chris@82 680 E T3L, T3O, T6F, T6G;
Chris@82 681 T3L = FNMS(KP956940335, T3C, T3z);
Chris@82 682 T3O = T3M - T3N;
Chris@82 683 Cr[WS(csr, 17)] = FNMS(KP989176509, T3O, T3L);
Chris@82 684 Cr[WS(csr, 14)] = FMA(KP989176509, T3O, T3L);
Chris@82 685 T6F = FNMS(KP956940335, T6C, T6B);
Chris@82 686 T6G = T3G + T3J;
Chris@82 687 Ci[WS(csi, 14)] = -(FMA(KP989176509, T6G, T6F));
Chris@82 688 Ci[WS(csi, 17)] = FNMS(KP989176509, T6G, T6F);
Chris@82 689 }
Chris@82 690 }
Chris@82 691 {
Chris@82 692 E T41, T4Z, T6l, T6r, T4g, T6m, T59, T5c, T4A, T4W, T52, T6s, T56, T5d, T4T;
Chris@82 693 E T4X, T40, T6k;
Chris@82 694 T40 = T3W - T3Z;
Chris@82 695 T41 = FNMS(KP980785280, T40, T3T);
Chris@82 696 T4Z = FMA(KP980785280, T40, T3T);
Chris@82 697 T6k = T5g + T5h;
Chris@82 698 T6l = FNMS(KP980785280, T6k, T6j);
Chris@82 699 T6r = FMA(KP980785280, T6k, T6j);
Chris@82 700 {
Chris@82 701 E T48, T4f, T57, T58;
Chris@82 702 T48 = FNMS(KP820678790, T47, T44);
Chris@82 703 T4f = FNMS(KP820678790, T4e, T4b);
Chris@82 704 T4g = T48 - T4f;
Chris@82 705 T6m = T4f + T48;
Chris@82 706 T57 = FMA(KP980785280, T4K, T4D);
Chris@82 707 T58 = FMA(KP980785280, T4R, T4O);
Chris@82 708 T59 = FNMS(KP357805721, T58, T57);
Chris@82 709 T5c = FMA(KP357805721, T57, T58);
Chris@82 710 }
Chris@82 711 {
Chris@82 712 E T4s, T4z, T50, T51;
Chris@82 713 T4s = FNMS(KP980785280, T4r, T4k);
Chris@82 714 T4z = FNMS(KP980785280, T4y, T4v);
Chris@82 715 T4A = FNMS(KP472964775, T4z, T4s);
Chris@82 716 T4W = FMA(KP472964775, T4s, T4z);
Chris@82 717 T50 = FMA(KP820678790, T4b, T4e);
Chris@82 718 T51 = FMA(KP820678790, T44, T47);
Chris@82 719 T52 = T50 - T51;
Chris@82 720 T6s = T50 + T51;
Chris@82 721 }
Chris@82 722 {
Chris@82 723 E T54, T55, T4L, T4S;
Chris@82 724 T54 = FMA(KP980785280, T4r, T4k);
Chris@82 725 T55 = FMA(KP980785280, T4y, T4v);
Chris@82 726 T56 = FMA(KP357805721, T55, T54);
Chris@82 727 T5d = FNMS(KP357805721, T54, T55);
Chris@82 728 T4L = FNMS(KP980785280, T4K, T4D);
Chris@82 729 T4S = FNMS(KP980785280, T4R, T4O);
Chris@82 730 T4T = FMA(KP472964775, T4S, T4L);
Chris@82 731 T4X = FNMS(KP472964775, T4L, T4S);
Chris@82 732 }
Chris@82 733 {
Chris@82 734 E T4h, T4U, T6t, T6u;
Chris@82 735 T4h = FMA(KP773010453, T4g, T41);
Chris@82 736 T4U = T4A + T4T;
Chris@82 737 Cr[WS(csr, 27)] = FNMS(KP903989293, T4U, T4h);
Chris@82 738 Cr[WS(csr, 4)] = FMA(KP903989293, T4U, T4h);
Chris@82 739 T6t = FMA(KP773010453, T6s, T6r);
Chris@82 740 T6u = T4W + T4X;
Chris@82 741 Ci[WS(csi, 4)] = -(FMA(KP903989293, T6u, T6t));
Chris@82 742 Ci[WS(csi, 27)] = FNMS(KP903989293, T6u, T6t);
Chris@82 743 }
Chris@82 744 {
Chris@82 745 E T4V, T4Y, T6v, T6w;
Chris@82 746 T4V = FNMS(KP773010453, T4g, T41);
Chris@82 747 T4Y = T4W - T4X;
Chris@82 748 Cr[WS(csr, 20)] = FNMS(KP903989293, T4Y, T4V);
Chris@82 749 Cr[WS(csr, 11)] = FMA(KP903989293, T4Y, T4V);
Chris@82 750 T6v = FNMS(KP773010453, T6s, T6r);
Chris@82 751 T6w = T4T - T4A;
Chris@82 752 Ci[WS(csi, 20)] = FMS(KP903989293, T6w, T6v);
Chris@82 753 Ci[WS(csi, 11)] = FMA(KP903989293, T6w, T6v);
Chris@82 754 }
Chris@82 755 {
Chris@82 756 E T53, T5a, T6n, T6o;
Chris@82 757 T53 = FMA(KP773010453, T52, T4Z);
Chris@82 758 T5a = T56 + T59;
Chris@82 759 Cr[WS(csr, 28)] = FNMS(KP941544065, T5a, T53);
Chris@82 760 Cr[WS(csr, 3)] = FMA(KP941544065, T5a, T53);
Chris@82 761 T6n = FMA(KP773010453, T6m, T6l);
Chris@82 762 T6o = T5d + T5c;
Chris@82 763 Ci[WS(csi, 28)] = FMS(KP941544065, T6o, T6n);
Chris@82 764 Ci[WS(csi, 3)] = FMA(KP941544065, T6o, T6n);
Chris@82 765 }
Chris@82 766 {
Chris@82 767 E T5b, T5e, T6p, T6q;
Chris@82 768 T5b = FNMS(KP773010453, T52, T4Z);
Chris@82 769 T5e = T5c - T5d;
Chris@82 770 Cr[WS(csr, 19)] = FNMS(KP941544065, T5e, T5b);
Chris@82 771 Cr[WS(csr, 12)] = FMA(KP941544065, T5e, T5b);
Chris@82 772 T6p = FNMS(KP773010453, T6m, T6l);
Chris@82 773 T6q = T59 - T56;
Chris@82 774 Ci[WS(csi, 12)] = FMS(KP941544065, T6q, T6p);
Chris@82 775 Ci[WS(csi, 19)] = FMA(KP941544065, T6q, T6p);
Chris@82 776 }
Chris@82 777 }
Chris@82 778 }
Chris@82 779 }
Chris@82 780 }
Chris@82 781
Chris@82 782 static const kr2c_desc desc = { 64, "r2cfII_64", {114, 0, 320, 0}, &GENUS };
Chris@82 783
Chris@82 784 void X(codelet_r2cfII_64) (planner *p) {
Chris@82 785 X(kr2c_register) (p, r2cfII_64, &desc);
Chris@82 786 }
Chris@82 787
Chris@82 788 #else
Chris@82 789
Chris@82 790 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 791
Chris@82 792 /*
Chris@82 793 * This function contains 434 FP additions, 206 FP multiplications,
Chris@82 794 * (or, 342 additions, 114 multiplications, 92 fused multiply/add),
Chris@82 795 * 118 stack variables, 31 constants, and 128 memory accesses
Chris@82 796 */
Chris@82 797 #include "rdft/scalar/r2cfII.h"
Chris@82 798
Chris@82 799 static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 800 {
Chris@82 801 DK(KP242980179, +0.242980179903263889948274162077471118320990783);
Chris@82 802 DK(KP970031253, +0.970031253194543992603984207286100251456865962);
Chris@82 803 DK(KP857728610, +0.857728610000272069902269984284770137042490799);
Chris@82 804 DK(KP514102744, +0.514102744193221726593693838968815772608049120);
Chris@82 805 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@82 806 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 807 DK(KP427555093, +0.427555093430282094320966856888798534304578629);
Chris@82 808 DK(KP903989293, +0.903989293123443331586200297230537048710132025);
Chris@82 809 DK(KP336889853, +0.336889853392220050689253212619147570477766780);
Chris@82 810 DK(KP941544065, +0.941544065183020778412509402599502357185589796);
Chris@82 811 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 812 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@82 813 DK(KP595699304, +0.595699304492433343467036528829969889511926338);
Chris@82 814 DK(KP803207531, +0.803207531480644909806676512963141923879569427);
Chris@82 815 DK(KP146730474, +0.146730474455361751658850129646717819706215317);
Chris@82 816 DK(KP989176509, +0.989176509964780973451673738016243063983689533);
Chris@82 817 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 818 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@82 819 DK(KP049067674, +0.049067674327418014254954976942682658314745363);
Chris@82 820 DK(KP998795456, +0.998795456205172392714771604759100694443203615);
Chris@82 821 DK(KP671558954, +0.671558954847018400625376850427421803228750632);
Chris@82 822 DK(KP740951125, +0.740951125354959091175616897495162729728955309);
Chris@82 823 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@82 824 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 825 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 826 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 827 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 828 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 829 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 830 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 831 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 832 {
Chris@82 833 INT i;
Chris@82 834 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@82 835 E Tm, T34, T3Z, T5g, Tv, T35, T3W, T5h, Td, T33, T6B, T6Q, T3T, T5f, T68;
Chris@82 836 E T6m, T2b, T3n, T4O, T5D, T2F, T3r, T4K, T5z, TK, T3c, T47, T5n, TR, T3b;
Chris@82 837 E T44, T5o, T15, T38, T4e, T5l, T1c, T39, T4b, T5k, T1s, T3g, T4v, T5w, T1W;
Chris@82 838 E T3k, T4k, T5s, T2u, T3q, T4R, T5A, T2y, T3o, T4H, T5C, T1L, T3j, T4y, T5t;
Chris@82 839 E T1P, T3h, T4r, T5v;
Chris@82 840 {
Chris@82 841 E Te, Tk, Th, Tj, Tf, Tg;
Chris@82 842 Te = R0[WS(rs, 2)];
Chris@82 843 Tk = R0[WS(rs, 18)];
Chris@82 844 Tf = R0[WS(rs, 10)];
Chris@82 845 Tg = R0[WS(rs, 26)];
Chris@82 846 Th = KP707106781 * (Tf - Tg);
Chris@82 847 Tj = KP707106781 * (Tf + Tg);
Chris@82 848 {
Chris@82 849 E Ti, Tl, T3X, T3Y;
Chris@82 850 Ti = Te + Th;
Chris@82 851 Tl = Tj + Tk;
Chris@82 852 Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
Chris@82 853 T34 = FMA(KP195090322, Ti, KP980785280 * Tl);
Chris@82 854 T3X = Tk - Tj;
Chris@82 855 T3Y = Te - Th;
Chris@82 856 T3Z = FNMS(KP555570233, T3Y, KP831469612 * T3X);
Chris@82 857 T5g = FMA(KP831469612, T3Y, KP555570233 * T3X);
Chris@82 858 }
Chris@82 859 }
Chris@82 860 {
Chris@82 861 E Tq, Tt, Tp, Ts, Tn, To;
Chris@82 862 Tq = R0[WS(rs, 30)];
Chris@82 863 Tt = R0[WS(rs, 14)];
Chris@82 864 Tn = R0[WS(rs, 6)];
Chris@82 865 To = R0[WS(rs, 22)];
Chris@82 866 Tp = KP707106781 * (Tn - To);
Chris@82 867 Ts = KP707106781 * (Tn + To);
Chris@82 868 {
Chris@82 869 E Tr, Tu, T3U, T3V;
Chris@82 870 Tr = Tp - Tq;
Chris@82 871 Tu = Ts + Tt;
Chris@82 872 Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
Chris@82 873 T35 = FNMS(KP980785280, Tu, KP195090322 * Tr);
Chris@82 874 T3U = Tt - Ts;
Chris@82 875 T3V = Tp + Tq;
Chris@82 876 T3W = FNMS(KP555570233, T3V, KP831469612 * T3U);
Chris@82 877 T5h = FMA(KP831469612, T3V, KP555570233 * T3U);
Chris@82 878 }
Chris@82 879 }
Chris@82 880 {
Chris@82 881 E T1, T66, T4, T65, T8, T3Q, Tb, T3R, T2, T3;
Chris@82 882 T1 = R0[0];
Chris@82 883 T66 = R0[WS(rs, 16)];
Chris@82 884 T2 = R0[WS(rs, 8)];
Chris@82 885 T3 = R0[WS(rs, 24)];
Chris@82 886 T4 = KP707106781 * (T2 - T3);
Chris@82 887 T65 = KP707106781 * (T2 + T3);
Chris@82 888 {
Chris@82 889 E T6, T7, T9, Ta;
Chris@82 890 T6 = R0[WS(rs, 4)];
Chris@82 891 T7 = R0[WS(rs, 20)];
Chris@82 892 T8 = FNMS(KP382683432, T7, KP923879532 * T6);
Chris@82 893 T3Q = FMA(KP382683432, T6, KP923879532 * T7);
Chris@82 894 T9 = R0[WS(rs, 12)];
Chris@82 895 Ta = R0[WS(rs, 28)];
Chris@82 896 Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
Chris@82 897 T3R = FMA(KP923879532, T9, KP382683432 * Ta);
Chris@82 898 }
Chris@82 899 {
Chris@82 900 E T5, Tc, T6z, T6A;
Chris@82 901 T5 = T1 + T4;
Chris@82 902 Tc = T8 + Tb;
Chris@82 903 Td = T5 + Tc;
Chris@82 904 T33 = T5 - Tc;
Chris@82 905 T6z = Tb - T8;
Chris@82 906 T6A = T66 - T65;
Chris@82 907 T6B = T6z - T6A;
Chris@82 908 T6Q = T6z + T6A;
Chris@82 909 }
Chris@82 910 {
Chris@82 911 E T3P, T3S, T64, T67;
Chris@82 912 T3P = T1 - T4;
Chris@82 913 T3S = T3Q - T3R;
Chris@82 914 T3T = T3P - T3S;
Chris@82 915 T5f = T3P + T3S;
Chris@82 916 T64 = T3Q + T3R;
Chris@82 917 T67 = T65 + T66;
Chris@82 918 T68 = T64 + T67;
Chris@82 919 T6m = T67 - T64;
Chris@82 920 }
Chris@82 921 }
Chris@82 922 {
Chris@82 923 E T22, T2D, T21, T2C, T26, T2z, T29, T2A, T1Z, T20;
Chris@82 924 T22 = R1[WS(rs, 31)];
Chris@82 925 T2D = R1[WS(rs, 15)];
Chris@82 926 T1Z = R1[WS(rs, 7)];
Chris@82 927 T20 = R1[WS(rs, 23)];
Chris@82 928 T21 = KP707106781 * (T1Z - T20);
Chris@82 929 T2C = KP707106781 * (T1Z + T20);
Chris@82 930 {
Chris@82 931 E T24, T25, T27, T28;
Chris@82 932 T24 = R1[WS(rs, 3)];
Chris@82 933 T25 = R1[WS(rs, 19)];
Chris@82 934 T26 = FNMS(KP382683432, T25, KP923879532 * T24);
Chris@82 935 T2z = FMA(KP382683432, T24, KP923879532 * T25);
Chris@82 936 T27 = R1[WS(rs, 11)];
Chris@82 937 T28 = R1[WS(rs, 27)];
Chris@82 938 T29 = FNMS(KP923879532, T28, KP382683432 * T27);
Chris@82 939 T2A = FMA(KP923879532, T27, KP382683432 * T28);
Chris@82 940 }
Chris@82 941 {
Chris@82 942 E T23, T2a, T4M, T4N;
Chris@82 943 T23 = T21 - T22;
Chris@82 944 T2a = T26 + T29;
Chris@82 945 T2b = T23 + T2a;
Chris@82 946 T3n = T23 - T2a;
Chris@82 947 T4M = T29 - T26;
Chris@82 948 T4N = T2D - T2C;
Chris@82 949 T4O = T4M - T4N;
Chris@82 950 T5D = T4M + T4N;
Chris@82 951 }
Chris@82 952 {
Chris@82 953 E T2B, T2E, T4I, T4J;
Chris@82 954 T2B = T2z + T2A;
Chris@82 955 T2E = T2C + T2D;
Chris@82 956 T2F = T2B + T2E;
Chris@82 957 T3r = T2E - T2B;
Chris@82 958 T4I = T21 + T22;
Chris@82 959 T4J = T2z - T2A;
Chris@82 960 T4K = T4I + T4J;
Chris@82 961 T5z = T4J - T4I;
Chris@82 962 }
Chris@82 963 }
Chris@82 964 {
Chris@82 965 E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
Chris@82 966 Ty = R0[WS(rs, 1)];
Chris@82 967 TP = R0[WS(rs, 17)];
Chris@82 968 Tz = R0[WS(rs, 9)];
Chris@82 969 TA = R0[WS(rs, 25)];
Chris@82 970 TB = KP707106781 * (Tz - TA);
Chris@82 971 TO = KP707106781 * (Tz + TA);
Chris@82 972 {
Chris@82 973 E TD, TE, TG, TH;
Chris@82 974 TD = R0[WS(rs, 5)];
Chris@82 975 TE = R0[WS(rs, 21)];
Chris@82 976 TF = FNMS(KP382683432, TE, KP923879532 * TD);
Chris@82 977 TL = FMA(KP382683432, TD, KP923879532 * TE);
Chris@82 978 TG = R0[WS(rs, 13)];
Chris@82 979 TH = R0[WS(rs, 29)];
Chris@82 980 TI = FNMS(KP923879532, TH, KP382683432 * TG);
Chris@82 981 TM = FMA(KP923879532, TG, KP382683432 * TH);
Chris@82 982 }
Chris@82 983 {
Chris@82 984 E TC, TJ, T45, T46;
Chris@82 985 TC = Ty + TB;
Chris@82 986 TJ = TF + TI;
Chris@82 987 TK = TC + TJ;
Chris@82 988 T3c = TC - TJ;
Chris@82 989 T45 = TI - TF;
Chris@82 990 T46 = TP - TO;
Chris@82 991 T47 = T45 - T46;
Chris@82 992 T5n = T45 + T46;
Chris@82 993 }
Chris@82 994 {
Chris@82 995 E TN, TQ, T42, T43;
Chris@82 996 TN = TL + TM;
Chris@82 997 TQ = TO + TP;
Chris@82 998 TR = TN + TQ;
Chris@82 999 T3b = TQ - TN;
Chris@82 1000 T42 = Ty - TB;
Chris@82 1001 T43 = TL - TM;
Chris@82 1002 T44 = T42 - T43;
Chris@82 1003 T5o = T42 + T43;
Chris@82 1004 }
Chris@82 1005 }
Chris@82 1006 {
Chris@82 1007 E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
Chris@82 1008 TW = R0[WS(rs, 31)];
Chris@82 1009 T1a = R0[WS(rs, 15)];
Chris@82 1010 TT = R0[WS(rs, 7)];
Chris@82 1011 TU = R0[WS(rs, 23)];
Chris@82 1012 TV = KP707106781 * (TT - TU);
Chris@82 1013 T19 = KP707106781 * (TT + TU);
Chris@82 1014 {
Chris@82 1015 E TY, TZ, T11, T12;
Chris@82 1016 TY = R0[WS(rs, 3)];
Chris@82 1017 TZ = R0[WS(rs, 19)];
Chris@82 1018 T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
Chris@82 1019 T16 = FMA(KP382683432, TY, KP923879532 * TZ);
Chris@82 1020 T11 = R0[WS(rs, 11)];
Chris@82 1021 T12 = R0[WS(rs, 27)];
Chris@82 1022 T13 = FNMS(KP923879532, T12, KP382683432 * T11);
Chris@82 1023 T17 = FMA(KP923879532, T11, KP382683432 * T12);
Chris@82 1024 }
Chris@82 1025 {
Chris@82 1026 E TX, T14, T4c, T4d;
Chris@82 1027 TX = TV - TW;
Chris@82 1028 T14 = T10 + T13;
Chris@82 1029 T15 = TX + T14;
Chris@82 1030 T38 = TX - T14;
Chris@82 1031 T4c = T13 - T10;
Chris@82 1032 T4d = T1a - T19;
Chris@82 1033 T4e = T4c - T4d;
Chris@82 1034 T5l = T4c + T4d;
Chris@82 1035 }
Chris@82 1036 {
Chris@82 1037 E T18, T1b, T49, T4a;
Chris@82 1038 T18 = T16 + T17;
Chris@82 1039 T1b = T19 + T1a;
Chris@82 1040 T1c = T18 + T1b;
Chris@82 1041 T39 = T1b - T18;
Chris@82 1042 T49 = TV + TW;
Chris@82 1043 T4a = T16 - T17;
Chris@82 1044 T4b = T49 + T4a;
Chris@82 1045 T5k = T4a - T49;
Chris@82 1046 }
Chris@82 1047 }
Chris@82 1048 {
Chris@82 1049 E T1g, T1U, T1j, T1T, T1n, T1Q, T1q, T1R, T1h, T1i;
Chris@82 1050 T1g = R1[0];
Chris@82 1051 T1U = R1[WS(rs, 16)];
Chris@82 1052 T1h = R1[WS(rs, 8)];
Chris@82 1053 T1i = R1[WS(rs, 24)];
Chris@82 1054 T1j = KP707106781 * (T1h - T1i);
Chris@82 1055 T1T = KP707106781 * (T1h + T1i);
Chris@82 1056 {
Chris@82 1057 E T1l, T1m, T1o, T1p;
Chris@82 1058 T1l = R1[WS(rs, 4)];
Chris@82 1059 T1m = R1[WS(rs, 20)];
Chris@82 1060 T1n = FNMS(KP382683432, T1m, KP923879532 * T1l);
Chris@82 1061 T1Q = FMA(KP382683432, T1l, KP923879532 * T1m);
Chris@82 1062 T1o = R1[WS(rs, 12)];
Chris@82 1063 T1p = R1[WS(rs, 28)];
Chris@82 1064 T1q = FNMS(KP923879532, T1p, KP382683432 * T1o);
Chris@82 1065 T1R = FMA(KP923879532, T1o, KP382683432 * T1p);
Chris@82 1066 }
Chris@82 1067 {
Chris@82 1068 E T1k, T1r, T4t, T4u;
Chris@82 1069 T1k = T1g + T1j;
Chris@82 1070 T1r = T1n + T1q;
Chris@82 1071 T1s = T1k + T1r;
Chris@82 1072 T3g = T1k - T1r;
Chris@82 1073 T4t = T1q - T1n;
Chris@82 1074 T4u = T1U - T1T;
Chris@82 1075 T4v = T4t - T4u;
Chris@82 1076 T5w = T4t + T4u;
Chris@82 1077 }
Chris@82 1078 {
Chris@82 1079 E T1S, T1V, T4i, T4j;
Chris@82 1080 T1S = T1Q + T1R;
Chris@82 1081 T1V = T1T + T1U;
Chris@82 1082 T1W = T1S + T1V;
Chris@82 1083 T3k = T1V - T1S;
Chris@82 1084 T4i = T1g - T1j;
Chris@82 1085 T4j = T1Q - T1R;
Chris@82 1086 T4k = T4i - T4j;
Chris@82 1087 T5s = T4i + T4j;
Chris@82 1088 }
Chris@82 1089 }
Chris@82 1090 {
Chris@82 1091 E T2g, T4F, T2j, T4E, T2p, T4C, T2s, T4B;
Chris@82 1092 {
Chris@82 1093 E T2c, T2i, T2f, T2h, T2d, T2e;
Chris@82 1094 T2c = R1[WS(rs, 1)];
Chris@82 1095 T2i = R1[WS(rs, 17)];
Chris@82 1096 T2d = R1[WS(rs, 9)];
Chris@82 1097 T2e = R1[WS(rs, 25)];
Chris@82 1098 T2f = KP707106781 * (T2d - T2e);
Chris@82 1099 T2h = KP707106781 * (T2d + T2e);
Chris@82 1100 T2g = T2c + T2f;
Chris@82 1101 T4F = T2c - T2f;
Chris@82 1102 T2j = T2h + T2i;
Chris@82 1103 T4E = T2i - T2h;
Chris@82 1104 }
Chris@82 1105 {
Chris@82 1106 E T2o, T2r, T2n, T2q, T2l, T2m;
Chris@82 1107 T2o = R1[WS(rs, 29)];
Chris@82 1108 T2r = R1[WS(rs, 13)];
Chris@82 1109 T2l = R1[WS(rs, 5)];
Chris@82 1110 T2m = R1[WS(rs, 21)];
Chris@82 1111 T2n = KP707106781 * (T2l - T2m);
Chris@82 1112 T2q = KP707106781 * (T2l + T2m);
Chris@82 1113 T2p = T2n - T2o;
Chris@82 1114 T4C = T2n + T2o;
Chris@82 1115 T2s = T2q + T2r;
Chris@82 1116 T4B = T2r - T2q;
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E T2k, T2t, T4P, T4Q;
Chris@82 1120 T2k = FNMS(KP195090322, T2j, KP980785280 * T2g);
Chris@82 1121 T2t = FMA(KP980785280, T2p, KP195090322 * T2s);
Chris@82 1122 T2u = T2k + T2t;
Chris@82 1123 T3q = T2t - T2k;
Chris@82 1124 T4P = FMA(KP831469612, T4F, KP555570233 * T4E);
Chris@82 1125 T4Q = FMA(KP831469612, T4C, KP555570233 * T4B);
Chris@82 1126 T4R = T4P + T4Q;
Chris@82 1127 T5A = T4P - T4Q;
Chris@82 1128 }
Chris@82 1129 {
Chris@82 1130 E T2w, T2x, T4D, T4G;
Chris@82 1131 T2w = FNMS(KP980785280, T2s, KP195090322 * T2p);
Chris@82 1132 T2x = FMA(KP195090322, T2g, KP980785280 * T2j);
Chris@82 1133 T2y = T2w - T2x;
Chris@82 1134 T3o = T2x + T2w;
Chris@82 1135 T4D = FNMS(KP555570233, T4C, KP831469612 * T4B);
Chris@82 1136 T4G = FNMS(KP555570233, T4F, KP831469612 * T4E);
Chris@82 1137 T4H = T4D - T4G;
Chris@82 1138 T5C = T4G + T4D;
Chris@82 1139 }
Chris@82 1140 }
Chris@82 1141 {
Chris@82 1142 E T1x, T4p, T1A, T4o, T1G, T4m, T1J, T4l;
Chris@82 1143 {
Chris@82 1144 E T1t, T1z, T1w, T1y, T1u, T1v;
Chris@82 1145 T1t = R1[WS(rs, 2)];
Chris@82 1146 T1z = R1[WS(rs, 18)];
Chris@82 1147 T1u = R1[WS(rs, 10)];
Chris@82 1148 T1v = R1[WS(rs, 26)];
Chris@82 1149 T1w = KP707106781 * (T1u - T1v);
Chris@82 1150 T1y = KP707106781 * (T1u + T1v);
Chris@82 1151 T1x = T1t + T1w;
Chris@82 1152 T4p = T1t - T1w;
Chris@82 1153 T1A = T1y + T1z;
Chris@82 1154 T4o = T1z - T1y;
Chris@82 1155 }
Chris@82 1156 {
Chris@82 1157 E T1F, T1I, T1E, T1H, T1C, T1D;
Chris@82 1158 T1F = R1[WS(rs, 30)];
Chris@82 1159 T1I = R1[WS(rs, 14)];
Chris@82 1160 T1C = R1[WS(rs, 6)];
Chris@82 1161 T1D = R1[WS(rs, 22)];
Chris@82 1162 T1E = KP707106781 * (T1C - T1D);
Chris@82 1163 T1H = KP707106781 * (T1C + T1D);
Chris@82 1164 T1G = T1E - T1F;
Chris@82 1165 T4m = T1E + T1F;
Chris@82 1166 T1J = T1H + T1I;
Chris@82 1167 T4l = T1I - T1H;
Chris@82 1168 }
Chris@82 1169 {
Chris@82 1170 E T1B, T1K, T4w, T4x;
Chris@82 1171 T1B = FNMS(KP195090322, T1A, KP980785280 * T1x);
Chris@82 1172 T1K = FMA(KP980785280, T1G, KP195090322 * T1J);
Chris@82 1173 T1L = T1B + T1K;
Chris@82 1174 T3j = T1K - T1B;
Chris@82 1175 T4w = FMA(KP831469612, T4p, KP555570233 * T4o);
Chris@82 1176 T4x = FMA(KP831469612, T4m, KP555570233 * T4l);
Chris@82 1177 T4y = T4w + T4x;
Chris@82 1178 T5t = T4w - T4x;
Chris@82 1179 }
Chris@82 1180 {
Chris@82 1181 E T1N, T1O, T4n, T4q;
Chris@82 1182 T1N = FNMS(KP980785280, T1J, KP195090322 * T1G);
Chris@82 1183 T1O = FMA(KP195090322, T1x, KP980785280 * T1A);
Chris@82 1184 T1P = T1N - T1O;
Chris@82 1185 T3h = T1O + T1N;
Chris@82 1186 T4n = FNMS(KP555570233, T4m, KP831469612 * T4l);
Chris@82 1187 T4q = FNMS(KP555570233, T4p, KP831469612 * T4o);
Chris@82 1188 T4r = T4n - T4q;
Chris@82 1189 T5v = T4q + T4n;
Chris@82 1190 }
Chris@82 1191 }
Chris@82 1192 {
Chris@82 1193 E Tx, T2N, T69, T6f, T1e, T6e, T2X, T30, T1Y, T2L, T2Q, T62, T2U, T31, T2H;
Chris@82 1194 E T2K, Tw, T63;
Chris@82 1195 Tw = Tm + Tv;
Chris@82 1196 Tx = Td + Tw;
Chris@82 1197 T2N = Td - Tw;
Chris@82 1198 T63 = T35 - T34;
Chris@82 1199 T69 = T63 - T68;
Chris@82 1200 T6f = T63 + T68;
Chris@82 1201 {
Chris@82 1202 E TS, T1d, T2V, T2W;
Chris@82 1203 TS = FNMS(KP098017140, TR, KP995184726 * TK);
Chris@82 1204 T1d = FMA(KP995184726, T15, KP098017140 * T1c);
Chris@82 1205 T1e = TS + T1d;
Chris@82 1206 T6e = T1d - TS;
Chris@82 1207 T2V = T2b - T2u;
Chris@82 1208 T2W = T2y + T2F;
Chris@82 1209 T2X = FNMS(KP671558954, T2W, KP740951125 * T2V);
Chris@82 1210 T30 = FMA(KP671558954, T2V, KP740951125 * T2W);
Chris@82 1211 }
Chris@82 1212 {
Chris@82 1213 E T1M, T1X, T2O, T2P;
Chris@82 1214 T1M = T1s + T1L;
Chris@82 1215 T1X = T1P - T1W;
Chris@82 1216 T1Y = FMA(KP998795456, T1M, KP049067674 * T1X);
Chris@82 1217 T2L = FNMS(KP049067674, T1M, KP998795456 * T1X);
Chris@82 1218 T2O = FMA(KP098017140, TK, KP995184726 * TR);
Chris@82 1219 T2P = FNMS(KP995184726, T1c, KP098017140 * T15);
Chris@82 1220 T2Q = T2O + T2P;
Chris@82 1221 T62 = T2P - T2O;
Chris@82 1222 }
Chris@82 1223 {
Chris@82 1224 E T2S, T2T, T2v, T2G;
Chris@82 1225 T2S = T1s - T1L;
Chris@82 1226 T2T = T1P + T1W;
Chris@82 1227 T2U = FMA(KP740951125, T2S, KP671558954 * T2T);
Chris@82 1228 T31 = FNMS(KP671558954, T2S, KP740951125 * T2T);
Chris@82 1229 T2v = T2b + T2u;
Chris@82 1230 T2G = T2y - T2F;
Chris@82 1231 T2H = FNMS(KP049067674, T2G, KP998795456 * T2v);
Chris@82 1232 T2K = FMA(KP049067674, T2v, KP998795456 * T2G);
Chris@82 1233 }
Chris@82 1234 {
Chris@82 1235 E T1f, T2I, T6b, T6c;
Chris@82 1236 T1f = Tx + T1e;
Chris@82 1237 T2I = T1Y + T2H;
Chris@82 1238 Cr[WS(csr, 31)] = T1f - T2I;
Chris@82 1239 Cr[0] = T1f + T2I;
Chris@82 1240 T6b = T2L + T2K;
Chris@82 1241 T6c = T62 + T69;
Chris@82 1242 Ci[WS(csi, 31)] = T6b - T6c;
Chris@82 1243 Ci[0] = T6b + T6c;
Chris@82 1244 }
Chris@82 1245 {
Chris@82 1246 E T2J, T2M, T61, T6a;
Chris@82 1247 T2J = Tx - T1e;
Chris@82 1248 T2M = T2K - T2L;
Chris@82 1249 Cr[WS(csr, 16)] = T2J - T2M;
Chris@82 1250 Cr[WS(csr, 15)] = T2J + T2M;
Chris@82 1251 T61 = T2H - T1Y;
Chris@82 1252 T6a = T62 - T69;
Chris@82 1253 Ci[WS(csi, 16)] = T61 - T6a;
Chris@82 1254 Ci[WS(csi, 15)] = T61 + T6a;
Chris@82 1255 }
Chris@82 1256 {
Chris@82 1257 E T2R, T2Y, T6h, T6i;
Chris@82 1258 T2R = T2N + T2Q;
Chris@82 1259 T2Y = T2U + T2X;
Chris@82 1260 Cr[WS(csr, 24)] = T2R - T2Y;
Chris@82 1261 Cr[WS(csr, 7)] = T2R + T2Y;
Chris@82 1262 T6h = T31 + T30;
Chris@82 1263 T6i = T6e + T6f;
Chris@82 1264 Ci[WS(csi, 24)] = T6h - T6i;
Chris@82 1265 Ci[WS(csi, 7)] = T6h + T6i;
Chris@82 1266 }
Chris@82 1267 {
Chris@82 1268 E T2Z, T32, T6d, T6g;
Chris@82 1269 T2Z = T2N - T2Q;
Chris@82 1270 T32 = T30 - T31;
Chris@82 1271 Cr[WS(csr, 23)] = T2Z - T32;
Chris@82 1272 Cr[WS(csr, 8)] = T2Z + T32;
Chris@82 1273 T6d = T2X - T2U;
Chris@82 1274 T6g = T6e - T6f;
Chris@82 1275 Ci[WS(csi, 23)] = T6d - T6g;
Chris@82 1276 Ci[WS(csi, 8)] = T6d + T6g;
Chris@82 1277 }
Chris@82 1278 }
Chris@82 1279 {
Chris@82 1280 E T5j, T5L, T6R, T6X, T5q, T6W, T5V, T5Y, T5y, T5J, T5O, T6O, T5S, T5Z, T5F;
Chris@82 1281 E T5I, T5i, T6P;
Chris@82 1282 T5i = T5g - T5h;
Chris@82 1283 T5j = T5f - T5i;
Chris@82 1284 T5L = T5f + T5i;
Chris@82 1285 T6P = T3Z + T3W;
Chris@82 1286 T6R = T6P - T6Q;
Chris@82 1287 T6X = T6P + T6Q;
Chris@82 1288 {
Chris@82 1289 E T5m, T5p, T5T, T5U;
Chris@82 1290 T5m = FMA(KP290284677, T5k, KP956940335 * T5l);
Chris@82 1291 T5p = FNMS(KP290284677, T5o, KP956940335 * T5n);
Chris@82 1292 T5q = T5m - T5p;
Chris@82 1293 T6W = T5p + T5m;
Chris@82 1294 T5T = T5z + T5A;
Chris@82 1295 T5U = T5C + T5D;
Chris@82 1296 T5V = FNMS(KP146730474, T5U, KP989176509 * T5T);
Chris@82 1297 T5Y = FMA(KP146730474, T5T, KP989176509 * T5U);
Chris@82 1298 }
Chris@82 1299 {
Chris@82 1300 E T5u, T5x, T5M, T5N;
Chris@82 1301 T5u = T5s - T5t;
Chris@82 1302 T5x = T5v - T5w;
Chris@82 1303 T5y = FMA(KP803207531, T5u, KP595699304 * T5x);
Chris@82 1304 T5J = FNMS(KP595699304, T5u, KP803207531 * T5x);
Chris@82 1305 T5M = FMA(KP956940335, T5o, KP290284677 * T5n);
Chris@82 1306 T5N = FNMS(KP290284677, T5l, KP956940335 * T5k);
Chris@82 1307 T5O = T5M + T5N;
Chris@82 1308 T6O = T5N - T5M;
Chris@82 1309 }
Chris@82 1310 {
Chris@82 1311 E T5Q, T5R, T5B, T5E;
Chris@82 1312 T5Q = T5s + T5t;
Chris@82 1313 T5R = T5v + T5w;
Chris@82 1314 T5S = FMA(KP989176509, T5Q, KP146730474 * T5R);
Chris@82 1315 T5Z = FNMS(KP146730474, T5Q, KP989176509 * T5R);
Chris@82 1316 T5B = T5z - T5A;
Chris@82 1317 T5E = T5C - T5D;
Chris@82 1318 T5F = FNMS(KP595699304, T5E, KP803207531 * T5B);
Chris@82 1319 T5I = FMA(KP595699304, T5B, KP803207531 * T5E);
Chris@82 1320 }
Chris@82 1321 {
Chris@82 1322 E T5r, T5G, T6T, T6U;
Chris@82 1323 T5r = T5j + T5q;
Chris@82 1324 T5G = T5y + T5F;
Chris@82 1325 Cr[WS(csr, 25)] = T5r - T5G;
Chris@82 1326 Cr[WS(csr, 6)] = T5r + T5G;
Chris@82 1327 T6T = T5J + T5I;
Chris@82 1328 T6U = T6O + T6R;
Chris@82 1329 Ci[WS(csi, 25)] = T6T - T6U;
Chris@82 1330 Ci[WS(csi, 6)] = T6T + T6U;
Chris@82 1331 }
Chris@82 1332 {
Chris@82 1333 E T5H, T5K, T6N, T6S;
Chris@82 1334 T5H = T5j - T5q;
Chris@82 1335 T5K = T5I - T5J;
Chris@82 1336 Cr[WS(csr, 22)] = T5H - T5K;
Chris@82 1337 Cr[WS(csr, 9)] = T5H + T5K;
Chris@82 1338 T6N = T5F - T5y;
Chris@82 1339 T6S = T6O - T6R;
Chris@82 1340 Ci[WS(csi, 22)] = T6N - T6S;
Chris@82 1341 Ci[WS(csi, 9)] = T6N + T6S;
Chris@82 1342 }
Chris@82 1343 {
Chris@82 1344 E T5P, T5W, T6Z, T70;
Chris@82 1345 T5P = T5L + T5O;
Chris@82 1346 T5W = T5S + T5V;
Chris@82 1347 Cr[WS(csr, 30)] = T5P - T5W;
Chris@82 1348 Cr[WS(csr, 1)] = T5P + T5W;
Chris@82 1349 T6Z = T5Z + T5Y;
Chris@82 1350 T70 = T6W + T6X;
Chris@82 1351 Ci[WS(csi, 30)] = T6Z - T70;
Chris@82 1352 Ci[WS(csi, 1)] = T6Z + T70;
Chris@82 1353 }
Chris@82 1354 {
Chris@82 1355 E T5X, T60, T6V, T6Y;
Chris@82 1356 T5X = T5L - T5O;
Chris@82 1357 T60 = T5Y - T5Z;
Chris@82 1358 Cr[WS(csr, 17)] = T5X - T60;
Chris@82 1359 Cr[WS(csr, 14)] = T5X + T60;
Chris@82 1360 T6V = T5V - T5S;
Chris@82 1361 T6Y = T6W - T6X;
Chris@82 1362 Ci[WS(csi, 17)] = T6V - T6Y;
Chris@82 1363 Ci[WS(csi, 14)] = T6V + T6Y;
Chris@82 1364 }
Chris@82 1365 }
Chris@82 1366 {
Chris@82 1367 E T37, T3z, T6n, T6t, T3e, T6s, T3J, T3M, T3m, T3x, T3C, T6k, T3G, T3N, T3t;
Chris@82 1368 E T3w, T36, T6l;
Chris@82 1369 T36 = T34 + T35;
Chris@82 1370 T37 = T33 - T36;
Chris@82 1371 T3z = T33 + T36;
Chris@82 1372 T6l = Tv - Tm;
Chris@82 1373 T6n = T6l - T6m;
Chris@82 1374 T6t = T6l + T6m;
Chris@82 1375 {
Chris@82 1376 E T3a, T3d, T3H, T3I;
Chris@82 1377 T3a = FMA(KP634393284, T38, KP773010453 * T39);
Chris@82 1378 T3d = FNMS(KP634393284, T3c, KP773010453 * T3b);
Chris@82 1379 T3e = T3a - T3d;
Chris@82 1380 T6s = T3d + T3a;
Chris@82 1381 T3H = T3n + T3o;
Chris@82 1382 T3I = T3q + T3r;
Chris@82 1383 T3J = FNMS(KP336889853, T3I, KP941544065 * T3H);
Chris@82 1384 T3M = FMA(KP336889853, T3H, KP941544065 * T3I);
Chris@82 1385 }
Chris@82 1386 {
Chris@82 1387 E T3i, T3l, T3A, T3B;
Chris@82 1388 T3i = T3g - T3h;
Chris@82 1389 T3l = T3j - T3k;
Chris@82 1390 T3m = FMA(KP903989293, T3i, KP427555093 * T3l);
Chris@82 1391 T3x = FNMS(KP427555093, T3i, KP903989293 * T3l);
Chris@82 1392 T3A = FMA(KP773010453, T3c, KP634393284 * T3b);
Chris@82 1393 T3B = FNMS(KP634393284, T39, KP773010453 * T38);
Chris@82 1394 T3C = T3A + T3B;
Chris@82 1395 T6k = T3B - T3A;
Chris@82 1396 }
Chris@82 1397 {
Chris@82 1398 E T3E, T3F, T3p, T3s;
Chris@82 1399 T3E = T3g + T3h;
Chris@82 1400 T3F = T3j + T3k;
Chris@82 1401 T3G = FMA(KP941544065, T3E, KP336889853 * T3F);
Chris@82 1402 T3N = FNMS(KP336889853, T3E, KP941544065 * T3F);
Chris@82 1403 T3p = T3n - T3o;
Chris@82 1404 T3s = T3q - T3r;
Chris@82 1405 T3t = FNMS(KP427555093, T3s, KP903989293 * T3p);
Chris@82 1406 T3w = FMA(KP427555093, T3p, KP903989293 * T3s);
Chris@82 1407 }
Chris@82 1408 {
Chris@82 1409 E T3f, T3u, T6p, T6q;
Chris@82 1410 T3f = T37 + T3e;
Chris@82 1411 T3u = T3m + T3t;
Chris@82 1412 Cr[WS(csr, 27)] = T3f - T3u;
Chris@82 1413 Cr[WS(csr, 4)] = T3f + T3u;
Chris@82 1414 T6p = T3x + T3w;
Chris@82 1415 T6q = T6k + T6n;
Chris@82 1416 Ci[WS(csi, 27)] = T6p - T6q;
Chris@82 1417 Ci[WS(csi, 4)] = T6p + T6q;
Chris@82 1418 }
Chris@82 1419 {
Chris@82 1420 E T3v, T3y, T6j, T6o;
Chris@82 1421 T3v = T37 - T3e;
Chris@82 1422 T3y = T3w - T3x;
Chris@82 1423 Cr[WS(csr, 20)] = T3v - T3y;
Chris@82 1424 Cr[WS(csr, 11)] = T3v + T3y;
Chris@82 1425 T6j = T3t - T3m;
Chris@82 1426 T6o = T6k - T6n;
Chris@82 1427 Ci[WS(csi, 20)] = T6j - T6o;
Chris@82 1428 Ci[WS(csi, 11)] = T6j + T6o;
Chris@82 1429 }
Chris@82 1430 {
Chris@82 1431 E T3D, T3K, T6v, T6w;
Chris@82 1432 T3D = T3z + T3C;
Chris@82 1433 T3K = T3G + T3J;
Chris@82 1434 Cr[WS(csr, 28)] = T3D - T3K;
Chris@82 1435 Cr[WS(csr, 3)] = T3D + T3K;
Chris@82 1436 T6v = T3N + T3M;
Chris@82 1437 T6w = T6s + T6t;
Chris@82 1438 Ci[WS(csi, 28)] = T6v - T6w;
Chris@82 1439 Ci[WS(csi, 3)] = T6v + T6w;
Chris@82 1440 }
Chris@82 1441 {
Chris@82 1442 E T3L, T3O, T6r, T6u;
Chris@82 1443 T3L = T3z - T3C;
Chris@82 1444 T3O = T3M - T3N;
Chris@82 1445 Cr[WS(csr, 19)] = T3L - T3O;
Chris@82 1446 Cr[WS(csr, 12)] = T3L + T3O;
Chris@82 1447 T6r = T3J - T3G;
Chris@82 1448 T6u = T6s - T6t;
Chris@82 1449 Ci[WS(csi, 19)] = T6r - T6u;
Chris@82 1450 Ci[WS(csi, 12)] = T6r + T6u;
Chris@82 1451 }
Chris@82 1452 }
Chris@82 1453 {
Chris@82 1454 E T41, T4Z, T6D, T6J, T4g, T6I, T59, T5d, T4A, T4X, T52, T6y, T56, T5c, T4T;
Chris@82 1455 E T4W, T40, T6C;
Chris@82 1456 T40 = T3W - T3Z;
Chris@82 1457 T41 = T3T + T40;
Chris@82 1458 T4Z = T3T - T40;
Chris@82 1459 T6C = T5g + T5h;
Chris@82 1460 T6D = T6B - T6C;
Chris@82 1461 T6J = T6C + T6B;
Chris@82 1462 {
Chris@82 1463 E T48, T4f, T57, T58;
Chris@82 1464 T48 = FMA(KP881921264, T44, KP471396736 * T47);
Chris@82 1465 T4f = FMA(KP881921264, T4b, KP471396736 * T4e);
Chris@82 1466 T4g = T48 - T4f;
Chris@82 1467 T6I = T48 + T4f;
Chris@82 1468 T57 = T4K + T4H;
Chris@82 1469 T58 = T4R + T4O;
Chris@82 1470 T59 = FMA(KP514102744, T57, KP857728610 * T58);
Chris@82 1471 T5d = FNMS(KP857728610, T57, KP514102744 * T58);
Chris@82 1472 }
Chris@82 1473 {
Chris@82 1474 E T4s, T4z, T50, T51;
Chris@82 1475 T4s = T4k + T4r;
Chris@82 1476 T4z = T4v - T4y;
Chris@82 1477 T4A = FMA(KP970031253, T4s, KP242980179 * T4z);
Chris@82 1478 T4X = FNMS(KP242980179, T4s, KP970031253 * T4z);
Chris@82 1479 T50 = FNMS(KP471396736, T4b, KP881921264 * T4e);
Chris@82 1480 T51 = FNMS(KP471396736, T44, KP881921264 * T47);
Chris@82 1481 T52 = T50 - T51;
Chris@82 1482 T6y = T51 + T50;
Chris@82 1483 }
Chris@82 1484 {
Chris@82 1485 E T54, T55, T4L, T4S;
Chris@82 1486 T54 = T4k - T4r;
Chris@82 1487 T55 = T4y + T4v;
Chris@82 1488 T56 = FMA(KP514102744, T54, KP857728610 * T55);
Chris@82 1489 T5c = FNMS(KP514102744, T55, KP857728610 * T54);
Chris@82 1490 T4L = T4H - T4K;
Chris@82 1491 T4S = T4O - T4R;
Chris@82 1492 T4T = FNMS(KP242980179, T4S, KP970031253 * T4L);
Chris@82 1493 T4W = FMA(KP242980179, T4L, KP970031253 * T4S);
Chris@82 1494 }
Chris@82 1495 {
Chris@82 1496 E T4h, T4U, T6F, T6G;
Chris@82 1497 T4h = T41 + T4g;
Chris@82 1498 T4U = T4A + T4T;
Chris@82 1499 Cr[WS(csr, 29)] = T4h - T4U;
Chris@82 1500 Cr[WS(csr, 2)] = T4h + T4U;
Chris@82 1501 T6F = T4X + T4W;
Chris@82 1502 T6G = T6y + T6D;
Chris@82 1503 Ci[WS(csi, 29)] = T6F - T6G;
Chris@82 1504 Ci[WS(csi, 2)] = T6F + T6G;
Chris@82 1505 }
Chris@82 1506 {
Chris@82 1507 E T4V, T4Y, T6x, T6E;
Chris@82 1508 T4V = T41 - T4g;
Chris@82 1509 T4Y = T4W - T4X;
Chris@82 1510 Cr[WS(csr, 18)] = T4V - T4Y;
Chris@82 1511 Cr[WS(csr, 13)] = T4V + T4Y;
Chris@82 1512 T6x = T4T - T4A;
Chris@82 1513 T6E = T6y - T6D;
Chris@82 1514 Ci[WS(csi, 18)] = T6x - T6E;
Chris@82 1515 Ci[WS(csi, 13)] = T6x + T6E;
Chris@82 1516 }
Chris@82 1517 {
Chris@82 1518 E T53, T5a, T6L, T6M;
Chris@82 1519 T53 = T4Z - T52;
Chris@82 1520 T5a = T56 - T59;
Chris@82 1521 Cr[WS(csr, 21)] = T53 - T5a;
Chris@82 1522 Cr[WS(csr, 10)] = T53 + T5a;
Chris@82 1523 T6L = T5d - T5c;
Chris@82 1524 T6M = T6J - T6I;
Chris@82 1525 Ci[WS(csi, 21)] = T6L - T6M;
Chris@82 1526 Ci[WS(csi, 10)] = T6L + T6M;
Chris@82 1527 }
Chris@82 1528 {
Chris@82 1529 E T5b, T5e, T6H, T6K;
Chris@82 1530 T5b = T4Z + T52;
Chris@82 1531 T5e = T5c + T5d;
Chris@82 1532 Cr[WS(csr, 26)] = T5b - T5e;
Chris@82 1533 Cr[WS(csr, 5)] = T5b + T5e;
Chris@82 1534 T6H = T56 + T59;
Chris@82 1535 T6K = T6I + T6J;
Chris@82 1536 Ci[WS(csi, 5)] = -(T6H + T6K);
Chris@82 1537 Ci[WS(csi, 26)] = T6K - T6H;
Chris@82 1538 }
Chris@82 1539 }
Chris@82 1540 }
Chris@82 1541 }
Chris@82 1542 }
Chris@82 1543
Chris@82 1544 static const kr2c_desc desc = { 64, "r2cfII_64", {342, 114, 92, 0}, &GENUS };
Chris@82 1545
Chris@82 1546 void X(codelet_r2cfII_64) (planner *p) {
Chris@82 1547 X(kr2c_register) (p, r2cfII_64, &desc);
Chris@82 1548 }
Chris@82 1549
Chris@82 1550 #endif