annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cfII_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:47:32 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include r2cfII.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 434 FP additions, 320 FP multiplications,
Chris@42 32 * (or, 114 additions, 0 multiplications, 320 fused multiply/add),
Chris@42 33 * 158 stack variables, 31 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cfII.h"
Chris@42 36
Chris@42 37 static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP941544065, +0.941544065183020778412509402599502357185589796);
Chris@42 40 DK(KP903989293, +0.903989293123443331586200297230537048710132025);
Chris@42 41 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 42 DK(KP472964775, +0.472964775891319928124438237972992463904131113);
Chris@42 43 DK(KP357805721, +0.357805721314524104672487743774474392487532769);
Chris@42 44 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 45 DK(KP989176509, +0.989176509964780973451673738016243063983689533);
Chris@42 46 DK(KP803207531, +0.803207531480644909806676512963141923879569427);
Chris@42 47 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 48 DK(KP741650546, +0.741650546272035369581266691172079863842265220);
Chris@42 49 DK(KP148335987, +0.148335987538347428753676511486911367000625355);
Chris@42 50 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 51 DK(KP998795456, +0.998795456205172392714771604759100694443203615);
Chris@42 52 DK(KP740951125, +0.740951125354959091175616897495162729728955309);
Chris@42 53 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 54 DK(KP906347169, +0.906347169019147157946142717268914412664134293);
Chris@42 55 DK(KP049126849, +0.049126849769467254105343321271313617079695752);
Chris@42 56 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 57 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 58 DK(KP970031253, +0.970031253194543992603984207286100251456865962);
Chris@42 59 DK(KP857728610, +0.857728610000272069902269984284770137042490799);
Chris@42 60 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 61 DK(KP599376933, +0.599376933681923766271389869014404232837890546);
Chris@42 62 DK(KP250486960, +0.250486960191305461595702160124721208578685568);
Chris@42 63 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 64 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 65 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 66 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 67 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 68 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 69 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 70 {
Chris@42 71 INT i;
Chris@42 72 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 73 E T5b, T6q, T6p, T5e;
Chris@42 74 {
Chris@42 75 E T5h, T3Z, T35, Tm, T5g, T3W, T34, Tv, T5f, T3T, T6N, T6z, T6j, T65, T33;
Chris@42 76 E Td, T5z, T4D, T3q, T2C, T5C, T4O, T3n, T2b, T5k, T4b, T3c, TR, T5l, T4e;
Chris@42 77 E T3b, TK, T5n, T44, T39, T1c, T5o, T47, T38, T15, T5s, T4k, T3j, T1T, T5v;
Chris@42 78 E T4v, T3g, T1s, T1t, T1y, T5D, T4K, T5A, T4R, T3o, T2F, T3r, T2u, T1C, T1H;
Chris@42 79 E T1D, T1z, T1w, T1E;
Chris@42 80 {
Chris@42 81 E T2A, T26, T4B, T23, T4M, T2y, T2z, T29;
Chris@42 82 {
Chris@42 83 E Te, Tj, Tn, Ts, To, Tk, Th, Tp, Tf, Tg;
Chris@42 84 Te = R0[WS(rs, 14)];
Chris@42 85 Tj = R0[WS(rs, 30)];
Chris@42 86 Tf = R0[WS(rs, 6)];
Chris@42 87 Tg = R0[WS(rs, 22)];
Chris@42 88 Tn = R0[WS(rs, 18)];
Chris@42 89 Ts = R0[WS(rs, 2)];
Chris@42 90 To = R0[WS(rs, 10)];
Chris@42 91 Tk = Tg - Tf;
Chris@42 92 Th = Tf + Tg;
Chris@42 93 Tp = R0[WS(rs, 26)];
Chris@42 94 {
Chris@42 95 E T3Q, T8, T3P, T5, T6x, T63, T3R, Tb;
Chris@42 96 {
Chris@42 97 E T1, T61, T9, T62, T4, Ta;
Chris@42 98 {
Chris@42 99 E T3V, Tu, T3U, Tr, T3Y, Tl;
Chris@42 100 T1 = R0[0];
Chris@42 101 T3Y = FMA(KP707106781, Tk, Tj);
Chris@42 102 Tl = FNMS(KP707106781, Tk, Tj);
Chris@42 103 {
Chris@42 104 E T3X, Ti, Tt, Tq;
Chris@42 105 T3X = FMA(KP707106781, Th, Te);
Chris@42 106 Ti = FNMS(KP707106781, Th, Te);
Chris@42 107 Tt = To - Tp;
Chris@42 108 Tq = To + Tp;
Chris@42 109 T5h = FNMS(KP198912367, T3X, T3Y);
Chris@42 110 T3Z = FMA(KP198912367, T3Y, T3X);
Chris@42 111 T35 = FMA(KP668178637, Ti, Tl);
Chris@42 112 Tm = FNMS(KP668178637, Tl, Ti);
Chris@42 113 T3V = FMA(KP707106781, Tt, Ts);
Chris@42 114 Tu = FNMS(KP707106781, Tt, Ts);
Chris@42 115 T3U = FMA(KP707106781, Tq, Tn);
Chris@42 116 Tr = FNMS(KP707106781, Tq, Tn);
Chris@42 117 T61 = R0[WS(rs, 16)];
Chris@42 118 }
Chris@42 119 {
Chris@42 120 E T2, T3, T6, T7;
Chris@42 121 T2 = R0[WS(rs, 8)];
Chris@42 122 T5g = FNMS(KP198912367, T3U, T3V);
Chris@42 123 T3W = FMA(KP198912367, T3V, T3U);
Chris@42 124 T34 = FMA(KP668178637, Tr, Tu);
Chris@42 125 Tv = FNMS(KP668178637, Tu, Tr);
Chris@42 126 T3 = R0[WS(rs, 24)];
Chris@42 127 T6 = R0[WS(rs, 20)];
Chris@42 128 T7 = R0[WS(rs, 4)];
Chris@42 129 T9 = R0[WS(rs, 12)];
Chris@42 130 T62 = T2 + T3;
Chris@42 131 T4 = T2 - T3;
Chris@42 132 T3Q = FNMS(KP414213562, T6, T7);
Chris@42 133 T8 = FMA(KP414213562, T7, T6);
Chris@42 134 Ta = R0[WS(rs, 28)];
Chris@42 135 }
Chris@42 136 }
Chris@42 137 T3P = FMA(KP707106781, T4, T1);
Chris@42 138 T5 = FNMS(KP707106781, T4, T1);
Chris@42 139 T6x = FNMS(KP707106781, T62, T61);
Chris@42 140 T63 = FMA(KP707106781, T62, T61);
Chris@42 141 T3R = FMS(KP414213562, T9, Ta);
Chris@42 142 Tb = FMA(KP414213562, Ta, T9);
Chris@42 143 }
Chris@42 144 {
Chris@42 145 E T1Z, T2w, T27, T2x, T22, T28;
Chris@42 146 T1Z = R1[WS(rs, 31)];
Chris@42 147 {
Chris@42 148 E T3S, T6y, T64, Tc;
Chris@42 149 T3S = T3Q + T3R;
Chris@42 150 T6y = T3R - T3Q;
Chris@42 151 T64 = T8 + Tb;
Chris@42 152 Tc = T8 - Tb;
Chris@42 153 T5f = FMA(KP923879532, T3S, T3P);
Chris@42 154 T3T = FNMS(KP923879532, T3S, T3P);
Chris@42 155 T6N = FNMS(KP923879532, T6y, T6x);
Chris@42 156 T6z = FMA(KP923879532, T6y, T6x);
Chris@42 157 T6j = FNMS(KP923879532, T64, T63);
Chris@42 158 T65 = FMA(KP923879532, T64, T63);
Chris@42 159 T33 = FMA(KP923879532, Tc, T5);
Chris@42 160 Td = FNMS(KP923879532, Tc, T5);
Chris@42 161 T2w = R1[WS(rs, 15)];
Chris@42 162 }
Chris@42 163 {
Chris@42 164 E T20, T21, T24, T25;
Chris@42 165 T20 = R1[WS(rs, 7)];
Chris@42 166 T21 = R1[WS(rs, 23)];
Chris@42 167 T24 = R1[WS(rs, 19)];
Chris@42 168 T25 = R1[WS(rs, 3)];
Chris@42 169 T27 = R1[WS(rs, 11)];
Chris@42 170 T2x = T20 + T21;
Chris@42 171 T22 = T20 - T21;
Chris@42 172 T2A = FNMS(KP414213562, T24, T25);
Chris@42 173 T26 = FMA(KP414213562, T25, T24);
Chris@42 174 T28 = R1[WS(rs, 27)];
Chris@42 175 }
Chris@42 176 T4B = FMS(KP707106781, T22, T1Z);
Chris@42 177 T23 = FMA(KP707106781, T22, T1Z);
Chris@42 178 T4M = FMA(KP707106781, T2x, T2w);
Chris@42 179 T2y = FNMS(KP707106781, T2x, T2w);
Chris@42 180 T2z = FMS(KP414213562, T27, T28);
Chris@42 181 T29 = FMA(KP414213562, T28, T27);
Chris@42 182 }
Chris@42 183 }
Chris@42 184 }
Chris@42 185 {
Chris@42 186 E T1a, T10, T42, TX, T45, T18, T19, T13;
Chris@42 187 {
Chris@42 188 E TP, TF, T49, TC, T4c, TN, TO, TI;
Chris@42 189 {
Chris@42 190 E Ty, TL, TG, TM, TB, TH;
Chris@42 191 Ty = R0[WS(rs, 17)];
Chris@42 192 {
Chris@42 193 E T4C, T2B, T4N, T2a;
Chris@42 194 T4C = T2A + T2z;
Chris@42 195 T2B = T2z - T2A;
Chris@42 196 T4N = T26 + T29;
Chris@42 197 T2a = T26 - T29;
Chris@42 198 T5z = FMA(KP923879532, T4C, T4B);
Chris@42 199 T4D = FNMS(KP923879532, T4C, T4B);
Chris@42 200 T3q = FMA(KP923879532, T2B, T2y);
Chris@42 201 T2C = FNMS(KP923879532, T2B, T2y);
Chris@42 202 T5C = FMA(KP923879532, T4N, T4M);
Chris@42 203 T4O = FNMS(KP923879532, T4N, T4M);
Chris@42 204 T3n = FNMS(KP923879532, T2a, T23);
Chris@42 205 T2b = FMA(KP923879532, T2a, T23);
Chris@42 206 TL = R0[WS(rs, 1)];
Chris@42 207 }
Chris@42 208 {
Chris@42 209 E Tz, TA, TD, TE;
Chris@42 210 Tz = R0[WS(rs, 9)];
Chris@42 211 TA = R0[WS(rs, 25)];
Chris@42 212 TD = R0[WS(rs, 29)];
Chris@42 213 TE = R0[WS(rs, 13)];
Chris@42 214 TG = R0[WS(rs, 5)];
Chris@42 215 TM = Tz - TA;
Chris@42 216 TB = Tz + TA;
Chris@42 217 TP = FMA(KP414213562, TD, TE);
Chris@42 218 TF = FMS(KP414213562, TE, TD);
Chris@42 219 TH = R0[WS(rs, 21)];
Chris@42 220 }
Chris@42 221 T49 = FMA(KP707106781, TB, Ty);
Chris@42 222 TC = FNMS(KP707106781, TB, Ty);
Chris@42 223 T4c = FMA(KP707106781, TM, TL);
Chris@42 224 TN = FNMS(KP707106781, TM, TL);
Chris@42 225 TO = FMA(KP414213562, TG, TH);
Chris@42 226 TI = FNMS(KP414213562, TH, TG);
Chris@42 227 }
Chris@42 228 {
Chris@42 229 E TT, T16, T11, T17, TW, T12;
Chris@42 230 TT = R0[WS(rs, 15)];
Chris@42 231 {
Chris@42 232 E T4a, TQ, T4d, TJ;
Chris@42 233 T4a = TO + TP;
Chris@42 234 TQ = TO - TP;
Chris@42 235 T4d = TI + TF;
Chris@42 236 TJ = TF - TI;
Chris@42 237 T5k = FMA(KP923879532, T4a, T49);
Chris@42 238 T4b = FNMS(KP923879532, T4a, T49);
Chris@42 239 T3c = FMA(KP923879532, TQ, TN);
Chris@42 240 TR = FNMS(KP923879532, TQ, TN);
Chris@42 241 T5l = FMA(KP923879532, T4d, T4c);
Chris@42 242 T4e = FNMS(KP923879532, T4d, T4c);
Chris@42 243 T3b = FMA(KP923879532, TJ, TC);
Chris@42 244 TK = FNMS(KP923879532, TJ, TC);
Chris@42 245 T16 = R0[WS(rs, 31)];
Chris@42 246 }
Chris@42 247 {
Chris@42 248 E TU, TV, TY, TZ;
Chris@42 249 TU = R0[WS(rs, 7)];
Chris@42 250 TV = R0[WS(rs, 23)];
Chris@42 251 TY = R0[WS(rs, 3)];
Chris@42 252 TZ = R0[WS(rs, 19)];
Chris@42 253 T11 = R0[WS(rs, 27)];
Chris@42 254 T17 = TV - TU;
Chris@42 255 TW = TU + TV;
Chris@42 256 T1a = FMA(KP414213562, TY, TZ);
Chris@42 257 T10 = FMS(KP414213562, TZ, TY);
Chris@42 258 T12 = R0[WS(rs, 11)];
Chris@42 259 }
Chris@42 260 T42 = FMA(KP707106781, TW, TT);
Chris@42 261 TX = FNMS(KP707106781, TW, TT);
Chris@42 262 T45 = FMA(KP707106781, T17, T16);
Chris@42 263 T18 = FNMS(KP707106781, T17, T16);
Chris@42 264 T19 = FMA(KP414213562, T11, T12);
Chris@42 265 T13 = FNMS(KP414213562, T12, T11);
Chris@42 266 }
Chris@42 267 }
Chris@42 268 {
Chris@42 269 E T1R, T1n, T4i, T1k, T4t, T1P, T1Q, T1q;
Chris@42 270 {
Chris@42 271 E T1g, T1N, T1o, T1O, T1j, T1p;
Chris@42 272 T1g = R1[0];
Chris@42 273 {
Chris@42 274 E T43, T1b, T46, T14;
Chris@42 275 T43 = T1a + T19;
Chris@42 276 T1b = T19 - T1a;
Chris@42 277 T46 = T10 + T13;
Chris@42 278 T14 = T10 - T13;
Chris@42 279 T5n = FMA(KP923879532, T43, T42);
Chris@42 280 T44 = FNMS(KP923879532, T43, T42);
Chris@42 281 T39 = FMA(KP923879532, T1b, T18);
Chris@42 282 T1c = FNMS(KP923879532, T1b, T18);
Chris@42 283 T5o = FMA(KP923879532, T46, T45);
Chris@42 284 T47 = FNMS(KP923879532, T46, T45);
Chris@42 285 T38 = FMA(KP923879532, T14, TX);
Chris@42 286 T15 = FNMS(KP923879532, T14, TX);
Chris@42 287 T1N = R1[WS(rs, 16)];
Chris@42 288 }
Chris@42 289 {
Chris@42 290 E T1h, T1i, T1l, T1m;
Chris@42 291 T1h = R1[WS(rs, 8)];
Chris@42 292 T1i = R1[WS(rs, 24)];
Chris@42 293 T1l = R1[WS(rs, 20)];
Chris@42 294 T1m = R1[WS(rs, 4)];
Chris@42 295 T1o = R1[WS(rs, 12)];
Chris@42 296 T1O = T1h + T1i;
Chris@42 297 T1j = T1h - T1i;
Chris@42 298 T1R = FNMS(KP414213562, T1l, T1m);
Chris@42 299 T1n = FMA(KP414213562, T1m, T1l);
Chris@42 300 T1p = R1[WS(rs, 28)];
Chris@42 301 }
Chris@42 302 T4i = FMA(KP707106781, T1j, T1g);
Chris@42 303 T1k = FNMS(KP707106781, T1j, T1g);
Chris@42 304 T4t = FMA(KP707106781, T1O, T1N);
Chris@42 305 T1P = FNMS(KP707106781, T1O, T1N);
Chris@42 306 T1Q = FMS(KP414213562, T1o, T1p);
Chris@42 307 T1q = FMA(KP414213562, T1p, T1o);
Chris@42 308 }
Chris@42 309 {
Chris@42 310 E T2c, T2h, T2l, T2q, T2m, T2i, T2f, T2n, T2d, T2e;
Chris@42 311 T2c = R1[WS(rs, 13)];
Chris@42 312 {
Chris@42 313 E T4j, T1S, T4u, T1r;
Chris@42 314 T4j = T1R + T1Q;
Chris@42 315 T1S = T1Q - T1R;
Chris@42 316 T4u = T1n + T1q;
Chris@42 317 T1r = T1n - T1q;
Chris@42 318 T5s = FMA(KP923879532, T4j, T4i);
Chris@42 319 T4k = FNMS(KP923879532, T4j, T4i);
Chris@42 320 T3j = FMA(KP923879532, T1S, T1P);
Chris@42 321 T1T = FNMS(KP923879532, T1S, T1P);
Chris@42 322 T5v = FMA(KP923879532, T4u, T4t);
Chris@42 323 T4v = FNMS(KP923879532, T4u, T4t);
Chris@42 324 T3g = FMA(KP923879532, T1r, T1k);
Chris@42 325 T1s = FNMS(KP923879532, T1r, T1k);
Chris@42 326 T2h = R1[WS(rs, 29)];
Chris@42 327 T2d = R1[WS(rs, 5)];
Chris@42 328 T2e = R1[WS(rs, 21)];
Chris@42 329 }
Chris@42 330 T2l = R1[WS(rs, 17)];
Chris@42 331 T2q = R1[WS(rs, 1)];
Chris@42 332 T2m = R1[WS(rs, 9)];
Chris@42 333 T2i = T2d - T2e;
Chris@42 334 T2f = T2d + T2e;
Chris@42 335 T2n = R1[WS(rs, 25)];
Chris@42 336 {
Chris@42 337 E T1u, T1v, T2j, T4I;
Chris@42 338 T1t = R1[WS(rs, 14)];
Chris@42 339 T2j = FMA(KP707106781, T2i, T2h);
Chris@42 340 T4I = FMS(KP707106781, T2i, T2h);
Chris@42 341 {
Chris@42 342 E T4H, T2g, T2r, T2o;
Chris@42 343 T4H = FMA(KP707106781, T2f, T2c);
Chris@42 344 T2g = FNMS(KP707106781, T2f, T2c);
Chris@42 345 T2r = T2m - T2n;
Chris@42 346 T2o = T2m + T2n;
Chris@42 347 {
Chris@42 348 E T4J, T4P, T2E, T2k;
Chris@42 349 T4J = FNMS(KP198912367, T4I, T4H);
Chris@42 350 T4P = FMA(KP198912367, T4H, T4I);
Chris@42 351 T2E = FMA(KP668178637, T2g, T2j);
Chris@42 352 T2k = FNMS(KP668178637, T2j, T2g);
Chris@42 353 {
Chris@42 354 E T2s, T4F, T4E, T2p;
Chris@42 355 T2s = FNMS(KP707106781, T2r, T2q);
Chris@42 356 T4F = FMA(KP707106781, T2r, T2q);
Chris@42 357 T4E = FMA(KP707106781, T2o, T2l);
Chris@42 358 T2p = FNMS(KP707106781, T2o, T2l);
Chris@42 359 T1y = R1[WS(rs, 30)];
Chris@42 360 T1u = R1[WS(rs, 6)];
Chris@42 361 {
Chris@42 362 E T4G, T4Q, T2D, T2t;
Chris@42 363 T4G = FMA(KP198912367, T4F, T4E);
Chris@42 364 T4Q = FNMS(KP198912367, T4E, T4F);
Chris@42 365 T2D = FMA(KP668178637, T2p, T2s);
Chris@42 366 T2t = FNMS(KP668178637, T2s, T2p);
Chris@42 367 T5D = T4G + T4J;
Chris@42 368 T4K = T4G - T4J;
Chris@42 369 T5A = T4Q + T4P;
Chris@42 370 T4R = T4P - T4Q;
Chris@42 371 T3o = T2D - T2E;
Chris@42 372 T2F = T2D + T2E;
Chris@42 373 T3r = T2t + T2k;
Chris@42 374 T2u = T2k - T2t;
Chris@42 375 T1v = R1[WS(rs, 22)];
Chris@42 376 }
Chris@42 377 }
Chris@42 378 }
Chris@42 379 }
Chris@42 380 T1C = R1[WS(rs, 18)];
Chris@42 381 T1H = R1[WS(rs, 2)];
Chris@42 382 T1D = R1[WS(rs, 10)];
Chris@42 383 T1z = T1u - T1v;
Chris@42 384 T1w = T1u + T1v;
Chris@42 385 T1E = R1[WS(rs, 26)];
Chris@42 386 }
Chris@42 387 }
Chris@42 388 }
Chris@42 389 }
Chris@42 390 }
Chris@42 391 {
Chris@42 392 E T6A, T4r, T4y, T3h, T3k, T36, T6k, T40, T5X, T6c, T6b, T60;
Chris@42 393 {
Chris@42 394 E T5w, T5t, T2Z, T6U, T6T, T32;
Chris@42 395 {
Chris@42 396 E Tx, T2N, T2v, T6V, T6P, T6Q, T1e, T2G, T31, T2X, T2L, T1Y, T6W, T2Q, T30;
Chris@42 397 E T2U;
Chris@42 398 {
Chris@42 399 E T1W, T1L, T2O, T2P, T2V, T2W, T6O, TS, T1d;
Chris@42 400 {
Chris@42 401 E T4q, T4w, T1V, T1B, T1J, T4m, T4l, T1G, Tw, T1A, T4p;
Chris@42 402 T6A = Tv + Tm;
Chris@42 403 Tw = Tm - Tv;
Chris@42 404 T1A = FMA(KP707106781, T1z, T1y);
Chris@42 405 T4p = FMS(KP707106781, T1z, T1y);
Chris@42 406 {
Chris@42 407 E T4o, T1x, T1I, T1F;
Chris@42 408 T4o = FMA(KP707106781, T1w, T1t);
Chris@42 409 T1x = FNMS(KP707106781, T1w, T1t);
Chris@42 410 T1I = T1D - T1E;
Chris@42 411 T1F = T1D + T1E;
Chris@42 412 T4q = FNMS(KP198912367, T4p, T4o);
Chris@42 413 T4w = FMA(KP198912367, T4o, T4p);
Chris@42 414 T1V = FMA(KP668178637, T1x, T1A);
Chris@42 415 T1B = FNMS(KP668178637, T1A, T1x);
Chris@42 416 T1J = FNMS(KP707106781, T1I, T1H);
Chris@42 417 T4m = FMA(KP707106781, T1I, T1H);
Chris@42 418 T4l = FMA(KP707106781, T1F, T1C);
Chris@42 419 T1G = FNMS(KP707106781, T1F, T1C);
Chris@42 420 Tx = FNMS(KP831469612, Tw, Td);
Chris@42 421 T2N = FMA(KP831469612, Tw, Td);
Chris@42 422 }
Chris@42 423 {
Chris@42 424 E T4n, T4x, T1U, T1K;
Chris@42 425 T4n = FMA(KP198912367, T4m, T4l);
Chris@42 426 T4x = FNMS(KP198912367, T4l, T4m);
Chris@42 427 T1U = FMA(KP668178637, T1G, T1J);
Chris@42 428 T1K = FNMS(KP668178637, T1J, T1G);
Chris@42 429 T5w = T4n + T4q;
Chris@42 430 T4r = T4n - T4q;
Chris@42 431 T5t = T4x + T4w;
Chris@42 432 T4y = T4w - T4x;
Chris@42 433 T3h = T1U - T1V;
Chris@42 434 T1W = T1U + T1V;
Chris@42 435 T3k = T1K + T1B;
Chris@42 436 T1L = T1B - T1K;
Chris@42 437 T6O = T34 + T35;
Chris@42 438 T36 = T34 - T35;
Chris@42 439 }
Chris@42 440 }
Chris@42 441 T2O = FNMS(KP534511135, TK, TR);
Chris@42 442 TS = FMA(KP534511135, TR, TK);
Chris@42 443 T1d = FMA(KP534511135, T1c, T15);
Chris@42 444 T2P = FNMS(KP534511135, T15, T1c);
Chris@42 445 T2v = FMA(KP831469612, T2u, T2b);
Chris@42 446 T2V = FNMS(KP831469612, T2u, T2b);
Chris@42 447 T6V = FNMS(KP831469612, T6O, T6N);
Chris@42 448 T6P = FMA(KP831469612, T6O, T6N);
Chris@42 449 T6Q = TS + T1d;
Chris@42 450 T1e = TS - T1d;
Chris@42 451 T2W = FMA(KP831469612, T2F, T2C);
Chris@42 452 T2G = FNMS(KP831469612, T2F, T2C);
Chris@42 453 {
Chris@42 454 E T2S, T2T, T1M, T1X;
Chris@42 455 T2S = FMA(KP831469612, T1L, T1s);
Chris@42 456 T1M = FNMS(KP831469612, T1L, T1s);
Chris@42 457 T1X = FNMS(KP831469612, T1W, T1T);
Chris@42 458 T2T = FMA(KP831469612, T1W, T1T);
Chris@42 459 T31 = FMA(KP250486960, T2V, T2W);
Chris@42 460 T2X = FNMS(KP250486960, T2W, T2V);
Chris@42 461 T2L = FNMS(KP599376933, T1M, T1X);
Chris@42 462 T1Y = FMA(KP599376933, T1X, T1M);
Chris@42 463 T6W = T2O + T2P;
Chris@42 464 T2Q = T2O - T2P;
Chris@42 465 T30 = FMA(KP250486960, T2S, T2T);
Chris@42 466 T2U = FNMS(KP250486960, T2T, T2S);
Chris@42 467 }
Chris@42 468 }
Chris@42 469 {
Chris@42 470 E T2J, T1f, T6X, T6Z, T2K, T2H;
Chris@42 471 T2J = FNMS(KP881921264, T1e, Tx);
Chris@42 472 T1f = FMA(KP881921264, T1e, Tx);
Chris@42 473 T6X = FNMS(KP881921264, T6W, T6V);
Chris@42 474 T6Z = FMA(KP881921264, T6W, T6V);
Chris@42 475 T2K = FNMS(KP599376933, T2v, T2G);
Chris@42 476 T2H = FMA(KP599376933, T2G, T2v);
Chris@42 477 {
Chris@42 478 E T2R, T2Y, T6R, T6S;
Chris@42 479 T2Z = FNMS(KP881921264, T2Q, T2N);
Chris@42 480 T2R = FMA(KP881921264, T2Q, T2N);
Chris@42 481 {
Chris@42 482 E T2M, T6Y, T70, T2I;
Chris@42 483 T2M = T2K - T2L;
Chris@42 484 T6Y = T2L + T2K;
Chris@42 485 T70 = T1Y + T2H;
Chris@42 486 T2I = T1Y - T2H;
Chris@42 487 Cr[WS(csr, 10)] = FMA(KP857728610, T2M, T2J);
Chris@42 488 Cr[WS(csr, 21)] = FNMS(KP857728610, T2M, T2J);
Chris@42 489 Ci[WS(csi, 5)] = FMA(KP857728610, T6Y, T6X);
Chris@42 490 Ci[WS(csi, 26)] = FMS(KP857728610, T6Y, T6X);
Chris@42 491 Ci[WS(csi, 21)] = FNMS(KP857728610, T70, T6Z);
Chris@42 492 Ci[WS(csi, 10)] = -(FMA(KP857728610, T70, T6Z));
Chris@42 493 Cr[WS(csr, 5)] = FMA(KP857728610, T2I, T1f);
Chris@42 494 Cr[WS(csr, 26)] = FNMS(KP857728610, T2I, T1f);
Chris@42 495 T2Y = T2U - T2X;
Chris@42 496 T6U = T2U + T2X;
Chris@42 497 }
Chris@42 498 T6T = FNMS(KP881921264, T6Q, T6P);
Chris@42 499 T6R = FMA(KP881921264, T6Q, T6P);
Chris@42 500 T6S = T30 + T31;
Chris@42 501 T32 = T30 - T31;
Chris@42 502 Cr[WS(csr, 2)] = FMA(KP970031253, T2Y, T2R);
Chris@42 503 Cr[WS(csr, 29)] = FNMS(KP970031253, T2Y, T2R);
Chris@42 504 Ci[WS(csi, 29)] = FNMS(KP970031253, T6S, T6R);
Chris@42 505 Ci[WS(csi, 2)] = -(FMA(KP970031253, T6S, T6R));
Chris@42 506 }
Chris@42 507 }
Chris@42 508 }
Chris@42 509 {
Chris@42 510 E T5j, T5L, T5B, T6d, T67, T68, T5q, T5E, T5Z, T5V, T5J, T5y, T6e, T5O, T5Y;
Chris@42 511 E T5S;
Chris@42 512 {
Chris@42 513 E T5M, T5N, T5T, T5U;
Chris@42 514 {
Chris@42 515 E T66, T5i, T5m, T5p;
Chris@42 516 T6k = T5g + T5h;
Chris@42 517 T5i = T5g - T5h;
Chris@42 518 Cr[WS(csr, 13)] = FMA(KP970031253, T32, T2Z);
Chris@42 519 Cr[WS(csr, 18)] = FNMS(KP970031253, T32, T2Z);
Chris@42 520 Ci[WS(csi, 13)] = FNMS(KP970031253, T6U, T6T);
Chris@42 521 Ci[WS(csi, 18)] = -(FMA(KP970031253, T6U, T6T));
Chris@42 522 T5j = FNMS(KP980785280, T5i, T5f);
Chris@42 523 T5L = FMA(KP980785280, T5i, T5f);
Chris@42 524 T66 = T3W + T3Z;
Chris@42 525 T40 = T3W - T3Z;
Chris@42 526 T5M = FNMS(KP098491403, T5k, T5l);
Chris@42 527 T5m = FMA(KP098491403, T5l, T5k);
Chris@42 528 T5p = FMA(KP098491403, T5o, T5n);
Chris@42 529 T5N = FNMS(KP098491403, T5n, T5o);
Chris@42 530 T5B = FNMS(KP980785280, T5A, T5z);
Chris@42 531 T5T = FMA(KP980785280, T5A, T5z);
Chris@42 532 T6d = FNMS(KP980785280, T66, T65);
Chris@42 533 T67 = FMA(KP980785280, T66, T65);
Chris@42 534 T68 = T5m + T5p;
Chris@42 535 T5q = T5m - T5p;
Chris@42 536 T5U = FMA(KP980785280, T5D, T5C);
Chris@42 537 T5E = FNMS(KP980785280, T5D, T5C);
Chris@42 538 }
Chris@42 539 {
Chris@42 540 E T5Q, T5R, T5u, T5x;
Chris@42 541 T5Q = FMA(KP980785280, T5t, T5s);
Chris@42 542 T5u = FNMS(KP980785280, T5t, T5s);
Chris@42 543 T5x = FNMS(KP980785280, T5w, T5v);
Chris@42 544 T5R = FMA(KP980785280, T5w, T5v);
Chris@42 545 T5Z = FNMS(KP049126849, T5T, T5U);
Chris@42 546 T5V = FMA(KP049126849, T5U, T5T);
Chris@42 547 T5J = FNMS(KP906347169, T5u, T5x);
Chris@42 548 T5y = FMA(KP906347169, T5x, T5u);
Chris@42 549 T6e = T5M + T5N;
Chris@42 550 T5O = T5M - T5N;
Chris@42 551 T5Y = FMA(KP049126849, T5Q, T5R);
Chris@42 552 T5S = FNMS(KP049126849, T5R, T5Q);
Chris@42 553 }
Chris@42 554 }
Chris@42 555 {
Chris@42 556 E T5H, T5r, T6f, T6h, T5I, T5F;
Chris@42 557 T5H = FNMS(KP995184726, T5q, T5j);
Chris@42 558 T5r = FMA(KP995184726, T5q, T5j);
Chris@42 559 T6f = FNMS(KP995184726, T6e, T6d);
Chris@42 560 T6h = FMA(KP995184726, T6e, T6d);
Chris@42 561 T5I = FMA(KP906347169, T5B, T5E);
Chris@42 562 T5F = FNMS(KP906347169, T5E, T5B);
Chris@42 563 {
Chris@42 564 E T5P, T5W, T69, T6a;
Chris@42 565 T5X = FNMS(KP995184726, T5O, T5L);
Chris@42 566 T5P = FMA(KP995184726, T5O, T5L);
Chris@42 567 {
Chris@42 568 E T5K, T6g, T6i, T5G;
Chris@42 569 T5K = T5I - T5J;
Chris@42 570 T6g = T5J + T5I;
Chris@42 571 T6i = T5F - T5y;
Chris@42 572 T5G = T5y + T5F;
Chris@42 573 Cr[WS(csr, 8)] = FMA(KP740951125, T5K, T5H);
Chris@42 574 Cr[WS(csr, 23)] = FNMS(KP740951125, T5K, T5H);
Chris@42 575 Ci[WS(csi, 7)] = FMA(KP740951125, T6g, T6f);
Chris@42 576 Ci[WS(csi, 24)] = FMS(KP740951125, T6g, T6f);
Chris@42 577 Ci[WS(csi, 23)] = FMA(KP740951125, T6i, T6h);
Chris@42 578 Ci[WS(csi, 8)] = FMS(KP740951125, T6i, T6h);
Chris@42 579 Cr[WS(csr, 7)] = FMA(KP740951125, T5G, T5r);
Chris@42 580 Cr[WS(csr, 24)] = FNMS(KP740951125, T5G, T5r);
Chris@42 581 T5W = T5S + T5V;
Chris@42 582 T6c = T5V - T5S;
Chris@42 583 }
Chris@42 584 T6b = FNMS(KP995184726, T68, T67);
Chris@42 585 T69 = FMA(KP995184726, T68, T67);
Chris@42 586 T6a = T5Y + T5Z;
Chris@42 587 T60 = T5Y - T5Z;
Chris@42 588 Cr[0] = FMA(KP998795456, T5W, T5P);
Chris@42 589 Cr[WS(csr, 31)] = FNMS(KP998795456, T5W, T5P);
Chris@42 590 Ci[WS(csi, 31)] = FNMS(KP998795456, T6a, T69);
Chris@42 591 Ci[0] = -(FMA(KP998795456, T6a, T69));
Chris@42 592 }
Chris@42 593 }
Chris@42 594 }
Chris@42 595 }
Chris@42 596 {
Chris@42 597 E T3L, T6G, T6F, T3O;
Chris@42 598 {
Chris@42 599 E T37, T3z, T3p, T6H, T6B, T6C, T3e, T3s, T3M, T3J, T3w, T3m, T6I, T3C, T3N;
Chris@42 600 E T3G;
Chris@42 601 {
Chris@42 602 E T3B, T3A, T3H, T3I, T3a, T3d;
Chris@42 603 Cr[WS(csr, 15)] = FMA(KP998795456, T60, T5X);
Chris@42 604 Cr[WS(csr, 16)] = FNMS(KP998795456, T60, T5X);
Chris@42 605 Ci[WS(csi, 15)] = FMA(KP998795456, T6c, T6b);
Chris@42 606 Ci[WS(csi, 16)] = FMS(KP998795456, T6c, T6b);
Chris@42 607 T37 = FNMS(KP831469612, T36, T33);
Chris@42 608 T3z = FMA(KP831469612, T36, T33);
Chris@42 609 T3B = FMA(KP303346683, T38, T39);
Chris@42 610 T3a = FNMS(KP303346683, T39, T38);
Chris@42 611 T3d = FNMS(KP303346683, T3c, T3b);
Chris@42 612 T3A = FMA(KP303346683, T3b, T3c);
Chris@42 613 T3p = FMA(KP831469612, T3o, T3n);
Chris@42 614 T3H = FNMS(KP831469612, T3o, T3n);
Chris@42 615 T6H = FNMS(KP831469612, T6A, T6z);
Chris@42 616 T6B = FMA(KP831469612, T6A, T6z);
Chris@42 617 T6C = T3d + T3a;
Chris@42 618 T3e = T3a - T3d;
Chris@42 619 T3I = FMA(KP831469612, T3r, T3q);
Chris@42 620 T3s = FNMS(KP831469612, T3r, T3q);
Chris@42 621 {
Chris@42 622 E T3E, T3F, T3i, T3l;
Chris@42 623 T3E = FMA(KP831469612, T3h, T3g);
Chris@42 624 T3i = FNMS(KP831469612, T3h, T3g);
Chris@42 625 T3l = FNMS(KP831469612, T3k, T3j);
Chris@42 626 T3F = FMA(KP831469612, T3k, T3j);
Chris@42 627 T3M = FNMS(KP148335987, T3H, T3I);
Chris@42 628 T3J = FMA(KP148335987, T3I, T3H);
Chris@42 629 T3w = FMA(KP741650546, T3i, T3l);
Chris@42 630 T3m = FNMS(KP741650546, T3l, T3i);
Chris@42 631 T6I = T3A + T3B;
Chris@42 632 T3C = T3A - T3B;
Chris@42 633 T3N = FNMS(KP148335987, T3E, T3F);
Chris@42 634 T3G = FMA(KP148335987, T3F, T3E);
Chris@42 635 }
Chris@42 636 }
Chris@42 637 {
Chris@42 638 E T3v, T3f, T6J, T6L, T3x, T3t;
Chris@42 639 T3v = FNMS(KP956940335, T3e, T37);
Chris@42 640 T3f = FMA(KP956940335, T3e, T37);
Chris@42 641 T6J = FMA(KP956940335, T6I, T6H);
Chris@42 642 T6L = FNMS(KP956940335, T6I, T6H);
Chris@42 643 T3x = FMA(KP741650546, T3p, T3s);
Chris@42 644 T3t = FNMS(KP741650546, T3s, T3p);
Chris@42 645 {
Chris@42 646 E T3D, T3K, T6D, T6E;
Chris@42 647 T3L = FNMS(KP956940335, T3C, T3z);
Chris@42 648 T3D = FMA(KP956940335, T3C, T3z);
Chris@42 649 {
Chris@42 650 E T3y, T6K, T6M, T3u;
Chris@42 651 T3y = T3w - T3x;
Chris@42 652 T6K = T3w + T3x;
Chris@42 653 T6M = T3m + T3t;
Chris@42 654 T3u = T3m - T3t;
Chris@42 655 Cr[WS(csr, 9)] = FMA(KP803207531, T3y, T3v);
Chris@42 656 Cr[WS(csr, 22)] = FNMS(KP803207531, T3y, T3v);
Chris@42 657 Ci[WS(csi, 25)] = FNMS(KP803207531, T6K, T6J);
Chris@42 658 Ci[WS(csi, 6)] = -(FMA(KP803207531, T6K, T6J));
Chris@42 659 Ci[WS(csi, 9)] = FNMS(KP803207531, T6M, T6L);
Chris@42 660 Ci[WS(csi, 22)] = -(FMA(KP803207531, T6M, T6L));
Chris@42 661 Cr[WS(csr, 6)] = FMA(KP803207531, T3u, T3f);
Chris@42 662 Cr[WS(csr, 25)] = FNMS(KP803207531, T3u, T3f);
Chris@42 663 T3K = T3G - T3J;
Chris@42 664 T6G = T3G + T3J;
Chris@42 665 }
Chris@42 666 T6F = FNMS(KP956940335, T6C, T6B);
Chris@42 667 T6D = FMA(KP956940335, T6C, T6B);
Chris@42 668 T6E = T3N + T3M;
Chris@42 669 T3O = T3M - T3N;
Chris@42 670 Cr[WS(csr, 1)] = FMA(KP989176509, T3K, T3D);
Chris@42 671 Cr[WS(csr, 30)] = FNMS(KP989176509, T3K, T3D);
Chris@42 672 Ci[WS(csi, 1)] = FMA(KP989176509, T6E, T6D);
Chris@42 673 Ci[WS(csi, 30)] = FMS(KP989176509, T6E, T6D);
Chris@42 674 }
Chris@42 675 }
Chris@42 676 }
Chris@42 677 {
Chris@42 678 E T41, T4Z, T4L, T6r, T6l, T6m, T4g, T4S, T5c, T59, T4W, T4A, T6s, T52, T5d;
Chris@42 679 E T56;
Chris@42 680 {
Chris@42 681 E T51, T50, T57, T58, T48, T4f;
Chris@42 682 Cr[WS(csr, 14)] = FMA(KP989176509, T3O, T3L);
Chris@42 683 Cr[WS(csr, 17)] = FNMS(KP989176509, T3O, T3L);
Chris@42 684 Ci[WS(csi, 17)] = FNMS(KP989176509, T6G, T6F);
Chris@42 685 Ci[WS(csi, 14)] = -(FMA(KP989176509, T6G, T6F));
Chris@42 686 T41 = FNMS(KP980785280, T40, T3T);
Chris@42 687 T4Z = FMA(KP980785280, T40, T3T);
Chris@42 688 T51 = FMA(KP820678790, T44, T47);
Chris@42 689 T48 = FNMS(KP820678790, T47, T44);
Chris@42 690 T4f = FNMS(KP820678790, T4e, T4b);
Chris@42 691 T50 = FMA(KP820678790, T4b, T4e);
Chris@42 692 T4L = FNMS(KP980785280, T4K, T4D);
Chris@42 693 T57 = FMA(KP980785280, T4K, T4D);
Chris@42 694 T6r = FMA(KP980785280, T6k, T6j);
Chris@42 695 T6l = FNMS(KP980785280, T6k, T6j);
Chris@42 696 T6m = T4f + T48;
Chris@42 697 T4g = T48 - T4f;
Chris@42 698 T58 = FMA(KP980785280, T4R, T4O);
Chris@42 699 T4S = FNMS(KP980785280, T4R, T4O);
Chris@42 700 {
Chris@42 701 E T54, T55, T4s, T4z;
Chris@42 702 T54 = FMA(KP980785280, T4r, T4k);
Chris@42 703 T4s = FNMS(KP980785280, T4r, T4k);
Chris@42 704 T4z = FNMS(KP980785280, T4y, T4v);
Chris@42 705 T55 = FMA(KP980785280, T4y, T4v);
Chris@42 706 T5c = FMA(KP357805721, T57, T58);
Chris@42 707 T59 = FNMS(KP357805721, T58, T57);
Chris@42 708 T4W = FMA(KP472964775, T4s, T4z);
Chris@42 709 T4A = FNMS(KP472964775, T4z, T4s);
Chris@42 710 T6s = T50 + T51;
Chris@42 711 T52 = T50 - T51;
Chris@42 712 T5d = FNMS(KP357805721, T54, T55);
Chris@42 713 T56 = FMA(KP357805721, T55, T54);
Chris@42 714 }
Chris@42 715 }
Chris@42 716 {
Chris@42 717 E T4V, T4h, T6t, T6v, T4X, T4T;
Chris@42 718 T4V = FNMS(KP773010453, T4g, T41);
Chris@42 719 T4h = FMA(KP773010453, T4g, T41);
Chris@42 720 T6t = FMA(KP773010453, T6s, T6r);
Chris@42 721 T6v = FNMS(KP773010453, T6s, T6r);
Chris@42 722 T4X = FNMS(KP472964775, T4L, T4S);
Chris@42 723 T4T = FMA(KP472964775, T4S, T4L);
Chris@42 724 {
Chris@42 725 E T53, T5a, T6n, T6o;
Chris@42 726 T5b = FNMS(KP773010453, T52, T4Z);
Chris@42 727 T53 = FMA(KP773010453, T52, T4Z);
Chris@42 728 {
Chris@42 729 E T4Y, T6u, T6w, T4U;
Chris@42 730 T4Y = T4W - T4X;
Chris@42 731 T6u = T4W + T4X;
Chris@42 732 T6w = T4T - T4A;
Chris@42 733 T4U = T4A + T4T;
Chris@42 734 Cr[WS(csr, 11)] = FMA(KP903989293, T4Y, T4V);
Chris@42 735 Cr[WS(csr, 20)] = FNMS(KP903989293, T4Y, T4V);
Chris@42 736 Ci[WS(csi, 27)] = FNMS(KP903989293, T6u, T6t);
Chris@42 737 Ci[WS(csi, 4)] = -(FMA(KP903989293, T6u, T6t));
Chris@42 738 Ci[WS(csi, 11)] = FMA(KP903989293, T6w, T6v);
Chris@42 739 Ci[WS(csi, 20)] = FMS(KP903989293, T6w, T6v);
Chris@42 740 Cr[WS(csr, 4)] = FMA(KP903989293, T4U, T4h);
Chris@42 741 Cr[WS(csr, 27)] = FNMS(KP903989293, T4U, T4h);
Chris@42 742 T5a = T56 + T59;
Chris@42 743 T6q = T59 - T56;
Chris@42 744 }
Chris@42 745 T6p = FNMS(KP773010453, T6m, T6l);
Chris@42 746 T6n = FMA(KP773010453, T6m, T6l);
Chris@42 747 T6o = T5d + T5c;
Chris@42 748 T5e = T5c - T5d;
Chris@42 749 Cr[WS(csr, 3)] = FMA(KP941544065, T5a, T53);
Chris@42 750 Cr[WS(csr, 28)] = FNMS(KP941544065, T5a, T53);
Chris@42 751 Ci[WS(csi, 3)] = FMA(KP941544065, T6o, T6n);
Chris@42 752 Ci[WS(csi, 28)] = FMS(KP941544065, T6o, T6n);
Chris@42 753 }
Chris@42 754 }
Chris@42 755 }
Chris@42 756 }
Chris@42 757 }
Chris@42 758 }
Chris@42 759 Cr[WS(csr, 12)] = FMA(KP941544065, T5e, T5b);
Chris@42 760 Cr[WS(csr, 19)] = FNMS(KP941544065, T5e, T5b);
Chris@42 761 Ci[WS(csi, 19)] = FMA(KP941544065, T6q, T6p);
Chris@42 762 Ci[WS(csi, 12)] = FMS(KP941544065, T6q, T6p);
Chris@42 763 }
Chris@42 764 }
Chris@42 765 }
Chris@42 766
Chris@42 767 static const kr2c_desc desc = { 64, "r2cfII_64", {114, 0, 320, 0}, &GENUS };
Chris@42 768
Chris@42 769 void X(codelet_r2cfII_64) (planner *p) {
Chris@42 770 X(kr2c_register) (p, r2cfII_64, &desc);
Chris@42 771 }
Chris@42 772
Chris@42 773 #else /* HAVE_FMA */
Chris@42 774
Chris@42 775 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cfII_64 -dft-II -include r2cfII.h */
Chris@42 776
Chris@42 777 /*
Chris@42 778 * This function contains 434 FP additions, 206 FP multiplications,
Chris@42 779 * (or, 342 additions, 114 multiplications, 92 fused multiply/add),
Chris@42 780 * 118 stack variables, 31 constants, and 128 memory accesses
Chris@42 781 */
Chris@42 782 #include "r2cfII.h"
Chris@42 783
Chris@42 784 static void r2cfII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 785 {
Chris@42 786 DK(KP242980179, +0.242980179903263889948274162077471118320990783);
Chris@42 787 DK(KP970031253, +0.970031253194543992603984207286100251456865962);
Chris@42 788 DK(KP857728610, +0.857728610000272069902269984284770137042490799);
Chris@42 789 DK(KP514102744, +0.514102744193221726593693838968815772608049120);
Chris@42 790 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@42 791 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 792 DK(KP427555093, +0.427555093430282094320966856888798534304578629);
Chris@42 793 DK(KP903989293, +0.903989293123443331586200297230537048710132025);
Chris@42 794 DK(KP336889853, +0.336889853392220050689253212619147570477766780);
Chris@42 795 DK(KP941544065, +0.941544065183020778412509402599502357185589796);
Chris@42 796 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 797 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@42 798 DK(KP595699304, +0.595699304492433343467036528829969889511926338);
Chris@42 799 DK(KP803207531, +0.803207531480644909806676512963141923879569427);
Chris@42 800 DK(KP146730474, +0.146730474455361751658850129646717819706215317);
Chris@42 801 DK(KP989176509, +0.989176509964780973451673738016243063983689533);
Chris@42 802 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 803 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@42 804 DK(KP049067674, +0.049067674327418014254954976942682658314745363);
Chris@42 805 DK(KP998795456, +0.998795456205172392714771604759100694443203615);
Chris@42 806 DK(KP671558954, +0.671558954847018400625376850427421803228750632);
Chris@42 807 DK(KP740951125, +0.740951125354959091175616897495162729728955309);
Chris@42 808 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@42 809 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 810 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 811 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 812 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 813 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 814 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 815 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 816 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 817 {
Chris@42 818 INT i;
Chris@42 819 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 820 E Tm, T34, T3Z, T5g, Tv, T35, T3W, T5h, Td, T33, T6B, T6Q, T3T, T5f, T68;
Chris@42 821 E T6m, T2b, T3n, T4O, T5D, T2F, T3r, T4K, T5z, TK, T3c, T47, T5n, TR, T3b;
Chris@42 822 E T44, T5o, T15, T38, T4e, T5l, T1c, T39, T4b, T5k, T1s, T3g, T4v, T5w, T1W;
Chris@42 823 E T3k, T4k, T5s, T2u, T3q, T4R, T5A, T2y, T3o, T4H, T5C, T1L, T3j, T4y, T5t;
Chris@42 824 E T1P, T3h, T4r, T5v;
Chris@42 825 {
Chris@42 826 E Te, Tk, Th, Tj, Tf, Tg;
Chris@42 827 Te = R0[WS(rs, 2)];
Chris@42 828 Tk = R0[WS(rs, 18)];
Chris@42 829 Tf = R0[WS(rs, 10)];
Chris@42 830 Tg = R0[WS(rs, 26)];
Chris@42 831 Th = KP707106781 * (Tf - Tg);
Chris@42 832 Tj = KP707106781 * (Tf + Tg);
Chris@42 833 {
Chris@42 834 E Ti, Tl, T3X, T3Y;
Chris@42 835 Ti = Te + Th;
Chris@42 836 Tl = Tj + Tk;
Chris@42 837 Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
Chris@42 838 T34 = FMA(KP195090322, Ti, KP980785280 * Tl);
Chris@42 839 T3X = Tk - Tj;
Chris@42 840 T3Y = Te - Th;
Chris@42 841 T3Z = FNMS(KP555570233, T3Y, KP831469612 * T3X);
Chris@42 842 T5g = FMA(KP831469612, T3Y, KP555570233 * T3X);
Chris@42 843 }
Chris@42 844 }
Chris@42 845 {
Chris@42 846 E Tq, Tt, Tp, Ts, Tn, To;
Chris@42 847 Tq = R0[WS(rs, 30)];
Chris@42 848 Tt = R0[WS(rs, 14)];
Chris@42 849 Tn = R0[WS(rs, 6)];
Chris@42 850 To = R0[WS(rs, 22)];
Chris@42 851 Tp = KP707106781 * (Tn - To);
Chris@42 852 Ts = KP707106781 * (Tn + To);
Chris@42 853 {
Chris@42 854 E Tr, Tu, T3U, T3V;
Chris@42 855 Tr = Tp - Tq;
Chris@42 856 Tu = Ts + Tt;
Chris@42 857 Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
Chris@42 858 T35 = FNMS(KP980785280, Tu, KP195090322 * Tr);
Chris@42 859 T3U = Tt - Ts;
Chris@42 860 T3V = Tp + Tq;
Chris@42 861 T3W = FNMS(KP555570233, T3V, KP831469612 * T3U);
Chris@42 862 T5h = FMA(KP831469612, T3V, KP555570233 * T3U);
Chris@42 863 }
Chris@42 864 }
Chris@42 865 {
Chris@42 866 E T1, T66, T4, T65, T8, T3Q, Tb, T3R, T2, T3;
Chris@42 867 T1 = R0[0];
Chris@42 868 T66 = R0[WS(rs, 16)];
Chris@42 869 T2 = R0[WS(rs, 8)];
Chris@42 870 T3 = R0[WS(rs, 24)];
Chris@42 871 T4 = KP707106781 * (T2 - T3);
Chris@42 872 T65 = KP707106781 * (T2 + T3);
Chris@42 873 {
Chris@42 874 E T6, T7, T9, Ta;
Chris@42 875 T6 = R0[WS(rs, 4)];
Chris@42 876 T7 = R0[WS(rs, 20)];
Chris@42 877 T8 = FNMS(KP382683432, T7, KP923879532 * T6);
Chris@42 878 T3Q = FMA(KP382683432, T6, KP923879532 * T7);
Chris@42 879 T9 = R0[WS(rs, 12)];
Chris@42 880 Ta = R0[WS(rs, 28)];
Chris@42 881 Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
Chris@42 882 T3R = FMA(KP923879532, T9, KP382683432 * Ta);
Chris@42 883 }
Chris@42 884 {
Chris@42 885 E T5, Tc, T6z, T6A;
Chris@42 886 T5 = T1 + T4;
Chris@42 887 Tc = T8 + Tb;
Chris@42 888 Td = T5 + Tc;
Chris@42 889 T33 = T5 - Tc;
Chris@42 890 T6z = Tb - T8;
Chris@42 891 T6A = T66 - T65;
Chris@42 892 T6B = T6z - T6A;
Chris@42 893 T6Q = T6z + T6A;
Chris@42 894 }
Chris@42 895 {
Chris@42 896 E T3P, T3S, T64, T67;
Chris@42 897 T3P = T1 - T4;
Chris@42 898 T3S = T3Q - T3R;
Chris@42 899 T3T = T3P - T3S;
Chris@42 900 T5f = T3P + T3S;
Chris@42 901 T64 = T3Q + T3R;
Chris@42 902 T67 = T65 + T66;
Chris@42 903 T68 = T64 + T67;
Chris@42 904 T6m = T67 - T64;
Chris@42 905 }
Chris@42 906 }
Chris@42 907 {
Chris@42 908 E T22, T2D, T21, T2C, T26, T2z, T29, T2A, T1Z, T20;
Chris@42 909 T22 = R1[WS(rs, 31)];
Chris@42 910 T2D = R1[WS(rs, 15)];
Chris@42 911 T1Z = R1[WS(rs, 7)];
Chris@42 912 T20 = R1[WS(rs, 23)];
Chris@42 913 T21 = KP707106781 * (T1Z - T20);
Chris@42 914 T2C = KP707106781 * (T1Z + T20);
Chris@42 915 {
Chris@42 916 E T24, T25, T27, T28;
Chris@42 917 T24 = R1[WS(rs, 3)];
Chris@42 918 T25 = R1[WS(rs, 19)];
Chris@42 919 T26 = FNMS(KP382683432, T25, KP923879532 * T24);
Chris@42 920 T2z = FMA(KP382683432, T24, KP923879532 * T25);
Chris@42 921 T27 = R1[WS(rs, 11)];
Chris@42 922 T28 = R1[WS(rs, 27)];
Chris@42 923 T29 = FNMS(KP923879532, T28, KP382683432 * T27);
Chris@42 924 T2A = FMA(KP923879532, T27, KP382683432 * T28);
Chris@42 925 }
Chris@42 926 {
Chris@42 927 E T23, T2a, T4M, T4N;
Chris@42 928 T23 = T21 - T22;
Chris@42 929 T2a = T26 + T29;
Chris@42 930 T2b = T23 + T2a;
Chris@42 931 T3n = T23 - T2a;
Chris@42 932 T4M = T29 - T26;
Chris@42 933 T4N = T2D - T2C;
Chris@42 934 T4O = T4M - T4N;
Chris@42 935 T5D = T4M + T4N;
Chris@42 936 }
Chris@42 937 {
Chris@42 938 E T2B, T2E, T4I, T4J;
Chris@42 939 T2B = T2z + T2A;
Chris@42 940 T2E = T2C + T2D;
Chris@42 941 T2F = T2B + T2E;
Chris@42 942 T3r = T2E - T2B;
Chris@42 943 T4I = T21 + T22;
Chris@42 944 T4J = T2z - T2A;
Chris@42 945 T4K = T4I + T4J;
Chris@42 946 T5z = T4J - T4I;
Chris@42 947 }
Chris@42 948 }
Chris@42 949 {
Chris@42 950 E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
Chris@42 951 Ty = R0[WS(rs, 1)];
Chris@42 952 TP = R0[WS(rs, 17)];
Chris@42 953 Tz = R0[WS(rs, 9)];
Chris@42 954 TA = R0[WS(rs, 25)];
Chris@42 955 TB = KP707106781 * (Tz - TA);
Chris@42 956 TO = KP707106781 * (Tz + TA);
Chris@42 957 {
Chris@42 958 E TD, TE, TG, TH;
Chris@42 959 TD = R0[WS(rs, 5)];
Chris@42 960 TE = R0[WS(rs, 21)];
Chris@42 961 TF = FNMS(KP382683432, TE, KP923879532 * TD);
Chris@42 962 TL = FMA(KP382683432, TD, KP923879532 * TE);
Chris@42 963 TG = R0[WS(rs, 13)];
Chris@42 964 TH = R0[WS(rs, 29)];
Chris@42 965 TI = FNMS(KP923879532, TH, KP382683432 * TG);
Chris@42 966 TM = FMA(KP923879532, TG, KP382683432 * TH);
Chris@42 967 }
Chris@42 968 {
Chris@42 969 E TC, TJ, T45, T46;
Chris@42 970 TC = Ty + TB;
Chris@42 971 TJ = TF + TI;
Chris@42 972 TK = TC + TJ;
Chris@42 973 T3c = TC - TJ;
Chris@42 974 T45 = TI - TF;
Chris@42 975 T46 = TP - TO;
Chris@42 976 T47 = T45 - T46;
Chris@42 977 T5n = T45 + T46;
Chris@42 978 }
Chris@42 979 {
Chris@42 980 E TN, TQ, T42, T43;
Chris@42 981 TN = TL + TM;
Chris@42 982 TQ = TO + TP;
Chris@42 983 TR = TN + TQ;
Chris@42 984 T3b = TQ - TN;
Chris@42 985 T42 = Ty - TB;
Chris@42 986 T43 = TL - TM;
Chris@42 987 T44 = T42 - T43;
Chris@42 988 T5o = T42 + T43;
Chris@42 989 }
Chris@42 990 }
Chris@42 991 {
Chris@42 992 E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
Chris@42 993 TW = R0[WS(rs, 31)];
Chris@42 994 T1a = R0[WS(rs, 15)];
Chris@42 995 TT = R0[WS(rs, 7)];
Chris@42 996 TU = R0[WS(rs, 23)];
Chris@42 997 TV = KP707106781 * (TT - TU);
Chris@42 998 T19 = KP707106781 * (TT + TU);
Chris@42 999 {
Chris@42 1000 E TY, TZ, T11, T12;
Chris@42 1001 TY = R0[WS(rs, 3)];
Chris@42 1002 TZ = R0[WS(rs, 19)];
Chris@42 1003 T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
Chris@42 1004 T16 = FMA(KP382683432, TY, KP923879532 * TZ);
Chris@42 1005 T11 = R0[WS(rs, 11)];
Chris@42 1006 T12 = R0[WS(rs, 27)];
Chris@42 1007 T13 = FNMS(KP923879532, T12, KP382683432 * T11);
Chris@42 1008 T17 = FMA(KP923879532, T11, KP382683432 * T12);
Chris@42 1009 }
Chris@42 1010 {
Chris@42 1011 E TX, T14, T4c, T4d;
Chris@42 1012 TX = TV - TW;
Chris@42 1013 T14 = T10 + T13;
Chris@42 1014 T15 = TX + T14;
Chris@42 1015 T38 = TX - T14;
Chris@42 1016 T4c = T13 - T10;
Chris@42 1017 T4d = T1a - T19;
Chris@42 1018 T4e = T4c - T4d;
Chris@42 1019 T5l = T4c + T4d;
Chris@42 1020 }
Chris@42 1021 {
Chris@42 1022 E T18, T1b, T49, T4a;
Chris@42 1023 T18 = T16 + T17;
Chris@42 1024 T1b = T19 + T1a;
Chris@42 1025 T1c = T18 + T1b;
Chris@42 1026 T39 = T1b - T18;
Chris@42 1027 T49 = TV + TW;
Chris@42 1028 T4a = T16 - T17;
Chris@42 1029 T4b = T49 + T4a;
Chris@42 1030 T5k = T4a - T49;
Chris@42 1031 }
Chris@42 1032 }
Chris@42 1033 {
Chris@42 1034 E T1g, T1U, T1j, T1T, T1n, T1Q, T1q, T1R, T1h, T1i;
Chris@42 1035 T1g = R1[0];
Chris@42 1036 T1U = R1[WS(rs, 16)];
Chris@42 1037 T1h = R1[WS(rs, 8)];
Chris@42 1038 T1i = R1[WS(rs, 24)];
Chris@42 1039 T1j = KP707106781 * (T1h - T1i);
Chris@42 1040 T1T = KP707106781 * (T1h + T1i);
Chris@42 1041 {
Chris@42 1042 E T1l, T1m, T1o, T1p;
Chris@42 1043 T1l = R1[WS(rs, 4)];
Chris@42 1044 T1m = R1[WS(rs, 20)];
Chris@42 1045 T1n = FNMS(KP382683432, T1m, KP923879532 * T1l);
Chris@42 1046 T1Q = FMA(KP382683432, T1l, KP923879532 * T1m);
Chris@42 1047 T1o = R1[WS(rs, 12)];
Chris@42 1048 T1p = R1[WS(rs, 28)];
Chris@42 1049 T1q = FNMS(KP923879532, T1p, KP382683432 * T1o);
Chris@42 1050 T1R = FMA(KP923879532, T1o, KP382683432 * T1p);
Chris@42 1051 }
Chris@42 1052 {
Chris@42 1053 E T1k, T1r, T4t, T4u;
Chris@42 1054 T1k = T1g + T1j;
Chris@42 1055 T1r = T1n + T1q;
Chris@42 1056 T1s = T1k + T1r;
Chris@42 1057 T3g = T1k - T1r;
Chris@42 1058 T4t = T1q - T1n;
Chris@42 1059 T4u = T1U - T1T;
Chris@42 1060 T4v = T4t - T4u;
Chris@42 1061 T5w = T4t + T4u;
Chris@42 1062 }
Chris@42 1063 {
Chris@42 1064 E T1S, T1V, T4i, T4j;
Chris@42 1065 T1S = T1Q + T1R;
Chris@42 1066 T1V = T1T + T1U;
Chris@42 1067 T1W = T1S + T1V;
Chris@42 1068 T3k = T1V - T1S;
Chris@42 1069 T4i = T1g - T1j;
Chris@42 1070 T4j = T1Q - T1R;
Chris@42 1071 T4k = T4i - T4j;
Chris@42 1072 T5s = T4i + T4j;
Chris@42 1073 }
Chris@42 1074 }
Chris@42 1075 {
Chris@42 1076 E T2g, T4F, T2j, T4E, T2p, T4C, T2s, T4B;
Chris@42 1077 {
Chris@42 1078 E T2c, T2i, T2f, T2h, T2d, T2e;
Chris@42 1079 T2c = R1[WS(rs, 1)];
Chris@42 1080 T2i = R1[WS(rs, 17)];
Chris@42 1081 T2d = R1[WS(rs, 9)];
Chris@42 1082 T2e = R1[WS(rs, 25)];
Chris@42 1083 T2f = KP707106781 * (T2d - T2e);
Chris@42 1084 T2h = KP707106781 * (T2d + T2e);
Chris@42 1085 T2g = T2c + T2f;
Chris@42 1086 T4F = T2c - T2f;
Chris@42 1087 T2j = T2h + T2i;
Chris@42 1088 T4E = T2i - T2h;
Chris@42 1089 }
Chris@42 1090 {
Chris@42 1091 E T2o, T2r, T2n, T2q, T2l, T2m;
Chris@42 1092 T2o = R1[WS(rs, 29)];
Chris@42 1093 T2r = R1[WS(rs, 13)];
Chris@42 1094 T2l = R1[WS(rs, 5)];
Chris@42 1095 T2m = R1[WS(rs, 21)];
Chris@42 1096 T2n = KP707106781 * (T2l - T2m);
Chris@42 1097 T2q = KP707106781 * (T2l + T2m);
Chris@42 1098 T2p = T2n - T2o;
Chris@42 1099 T4C = T2n + T2o;
Chris@42 1100 T2s = T2q + T2r;
Chris@42 1101 T4B = T2r - T2q;
Chris@42 1102 }
Chris@42 1103 {
Chris@42 1104 E T2k, T2t, T4P, T4Q;
Chris@42 1105 T2k = FNMS(KP195090322, T2j, KP980785280 * T2g);
Chris@42 1106 T2t = FMA(KP980785280, T2p, KP195090322 * T2s);
Chris@42 1107 T2u = T2k + T2t;
Chris@42 1108 T3q = T2t - T2k;
Chris@42 1109 T4P = FMA(KP831469612, T4F, KP555570233 * T4E);
Chris@42 1110 T4Q = FMA(KP831469612, T4C, KP555570233 * T4B);
Chris@42 1111 T4R = T4P + T4Q;
Chris@42 1112 T5A = T4P - T4Q;
Chris@42 1113 }
Chris@42 1114 {
Chris@42 1115 E T2w, T2x, T4D, T4G;
Chris@42 1116 T2w = FNMS(KP980785280, T2s, KP195090322 * T2p);
Chris@42 1117 T2x = FMA(KP195090322, T2g, KP980785280 * T2j);
Chris@42 1118 T2y = T2w - T2x;
Chris@42 1119 T3o = T2x + T2w;
Chris@42 1120 T4D = FNMS(KP555570233, T4C, KP831469612 * T4B);
Chris@42 1121 T4G = FNMS(KP555570233, T4F, KP831469612 * T4E);
Chris@42 1122 T4H = T4D - T4G;
Chris@42 1123 T5C = T4G + T4D;
Chris@42 1124 }
Chris@42 1125 }
Chris@42 1126 {
Chris@42 1127 E T1x, T4p, T1A, T4o, T1G, T4m, T1J, T4l;
Chris@42 1128 {
Chris@42 1129 E T1t, T1z, T1w, T1y, T1u, T1v;
Chris@42 1130 T1t = R1[WS(rs, 2)];
Chris@42 1131 T1z = R1[WS(rs, 18)];
Chris@42 1132 T1u = R1[WS(rs, 10)];
Chris@42 1133 T1v = R1[WS(rs, 26)];
Chris@42 1134 T1w = KP707106781 * (T1u - T1v);
Chris@42 1135 T1y = KP707106781 * (T1u + T1v);
Chris@42 1136 T1x = T1t + T1w;
Chris@42 1137 T4p = T1t - T1w;
Chris@42 1138 T1A = T1y + T1z;
Chris@42 1139 T4o = T1z - T1y;
Chris@42 1140 }
Chris@42 1141 {
Chris@42 1142 E T1F, T1I, T1E, T1H, T1C, T1D;
Chris@42 1143 T1F = R1[WS(rs, 30)];
Chris@42 1144 T1I = R1[WS(rs, 14)];
Chris@42 1145 T1C = R1[WS(rs, 6)];
Chris@42 1146 T1D = R1[WS(rs, 22)];
Chris@42 1147 T1E = KP707106781 * (T1C - T1D);
Chris@42 1148 T1H = KP707106781 * (T1C + T1D);
Chris@42 1149 T1G = T1E - T1F;
Chris@42 1150 T4m = T1E + T1F;
Chris@42 1151 T1J = T1H + T1I;
Chris@42 1152 T4l = T1I - T1H;
Chris@42 1153 }
Chris@42 1154 {
Chris@42 1155 E T1B, T1K, T4w, T4x;
Chris@42 1156 T1B = FNMS(KP195090322, T1A, KP980785280 * T1x);
Chris@42 1157 T1K = FMA(KP980785280, T1G, KP195090322 * T1J);
Chris@42 1158 T1L = T1B + T1K;
Chris@42 1159 T3j = T1K - T1B;
Chris@42 1160 T4w = FMA(KP831469612, T4p, KP555570233 * T4o);
Chris@42 1161 T4x = FMA(KP831469612, T4m, KP555570233 * T4l);
Chris@42 1162 T4y = T4w + T4x;
Chris@42 1163 T5t = T4w - T4x;
Chris@42 1164 }
Chris@42 1165 {
Chris@42 1166 E T1N, T1O, T4n, T4q;
Chris@42 1167 T1N = FNMS(KP980785280, T1J, KP195090322 * T1G);
Chris@42 1168 T1O = FMA(KP195090322, T1x, KP980785280 * T1A);
Chris@42 1169 T1P = T1N - T1O;
Chris@42 1170 T3h = T1O + T1N;
Chris@42 1171 T4n = FNMS(KP555570233, T4m, KP831469612 * T4l);
Chris@42 1172 T4q = FNMS(KP555570233, T4p, KP831469612 * T4o);
Chris@42 1173 T4r = T4n - T4q;
Chris@42 1174 T5v = T4q + T4n;
Chris@42 1175 }
Chris@42 1176 }
Chris@42 1177 {
Chris@42 1178 E Tx, T2N, T69, T6f, T1e, T6e, T2X, T30, T1Y, T2L, T2Q, T62, T2U, T31, T2H;
Chris@42 1179 E T2K, Tw, T63;
Chris@42 1180 Tw = Tm + Tv;
Chris@42 1181 Tx = Td + Tw;
Chris@42 1182 T2N = Td - Tw;
Chris@42 1183 T63 = T35 - T34;
Chris@42 1184 T69 = T63 - T68;
Chris@42 1185 T6f = T63 + T68;
Chris@42 1186 {
Chris@42 1187 E TS, T1d, T2V, T2W;
Chris@42 1188 TS = FNMS(KP098017140, TR, KP995184726 * TK);
Chris@42 1189 T1d = FMA(KP995184726, T15, KP098017140 * T1c);
Chris@42 1190 T1e = TS + T1d;
Chris@42 1191 T6e = T1d - TS;
Chris@42 1192 T2V = T2b - T2u;
Chris@42 1193 T2W = T2y + T2F;
Chris@42 1194 T2X = FNMS(KP671558954, T2W, KP740951125 * T2V);
Chris@42 1195 T30 = FMA(KP671558954, T2V, KP740951125 * T2W);
Chris@42 1196 }
Chris@42 1197 {
Chris@42 1198 E T1M, T1X, T2O, T2P;
Chris@42 1199 T1M = T1s + T1L;
Chris@42 1200 T1X = T1P - T1W;
Chris@42 1201 T1Y = FMA(KP998795456, T1M, KP049067674 * T1X);
Chris@42 1202 T2L = FNMS(KP049067674, T1M, KP998795456 * T1X);
Chris@42 1203 T2O = FMA(KP098017140, TK, KP995184726 * TR);
Chris@42 1204 T2P = FNMS(KP995184726, T1c, KP098017140 * T15);
Chris@42 1205 T2Q = T2O + T2P;
Chris@42 1206 T62 = T2P - T2O;
Chris@42 1207 }
Chris@42 1208 {
Chris@42 1209 E T2S, T2T, T2v, T2G;
Chris@42 1210 T2S = T1s - T1L;
Chris@42 1211 T2T = T1P + T1W;
Chris@42 1212 T2U = FMA(KP740951125, T2S, KP671558954 * T2T);
Chris@42 1213 T31 = FNMS(KP671558954, T2S, KP740951125 * T2T);
Chris@42 1214 T2v = T2b + T2u;
Chris@42 1215 T2G = T2y - T2F;
Chris@42 1216 T2H = FNMS(KP049067674, T2G, KP998795456 * T2v);
Chris@42 1217 T2K = FMA(KP049067674, T2v, KP998795456 * T2G);
Chris@42 1218 }
Chris@42 1219 {
Chris@42 1220 E T1f, T2I, T6b, T6c;
Chris@42 1221 T1f = Tx + T1e;
Chris@42 1222 T2I = T1Y + T2H;
Chris@42 1223 Cr[WS(csr, 31)] = T1f - T2I;
Chris@42 1224 Cr[0] = T1f + T2I;
Chris@42 1225 T6b = T2L + T2K;
Chris@42 1226 T6c = T62 + T69;
Chris@42 1227 Ci[WS(csi, 31)] = T6b - T6c;
Chris@42 1228 Ci[0] = T6b + T6c;
Chris@42 1229 }
Chris@42 1230 {
Chris@42 1231 E T2J, T2M, T61, T6a;
Chris@42 1232 T2J = Tx - T1e;
Chris@42 1233 T2M = T2K - T2L;
Chris@42 1234 Cr[WS(csr, 16)] = T2J - T2M;
Chris@42 1235 Cr[WS(csr, 15)] = T2J + T2M;
Chris@42 1236 T61 = T2H - T1Y;
Chris@42 1237 T6a = T62 - T69;
Chris@42 1238 Ci[WS(csi, 16)] = T61 - T6a;
Chris@42 1239 Ci[WS(csi, 15)] = T61 + T6a;
Chris@42 1240 }
Chris@42 1241 {
Chris@42 1242 E T2R, T2Y, T6h, T6i;
Chris@42 1243 T2R = T2N + T2Q;
Chris@42 1244 T2Y = T2U + T2X;
Chris@42 1245 Cr[WS(csr, 24)] = T2R - T2Y;
Chris@42 1246 Cr[WS(csr, 7)] = T2R + T2Y;
Chris@42 1247 T6h = T31 + T30;
Chris@42 1248 T6i = T6e + T6f;
Chris@42 1249 Ci[WS(csi, 24)] = T6h - T6i;
Chris@42 1250 Ci[WS(csi, 7)] = T6h + T6i;
Chris@42 1251 }
Chris@42 1252 {
Chris@42 1253 E T2Z, T32, T6d, T6g;
Chris@42 1254 T2Z = T2N - T2Q;
Chris@42 1255 T32 = T30 - T31;
Chris@42 1256 Cr[WS(csr, 23)] = T2Z - T32;
Chris@42 1257 Cr[WS(csr, 8)] = T2Z + T32;
Chris@42 1258 T6d = T2X - T2U;
Chris@42 1259 T6g = T6e - T6f;
Chris@42 1260 Ci[WS(csi, 23)] = T6d - T6g;
Chris@42 1261 Ci[WS(csi, 8)] = T6d + T6g;
Chris@42 1262 }
Chris@42 1263 }
Chris@42 1264 {
Chris@42 1265 E T5j, T5L, T6R, T6X, T5q, T6W, T5V, T5Y, T5y, T5J, T5O, T6O, T5S, T5Z, T5F;
Chris@42 1266 E T5I, T5i, T6P;
Chris@42 1267 T5i = T5g - T5h;
Chris@42 1268 T5j = T5f - T5i;
Chris@42 1269 T5L = T5f + T5i;
Chris@42 1270 T6P = T3Z + T3W;
Chris@42 1271 T6R = T6P - T6Q;
Chris@42 1272 T6X = T6P + T6Q;
Chris@42 1273 {
Chris@42 1274 E T5m, T5p, T5T, T5U;
Chris@42 1275 T5m = FMA(KP290284677, T5k, KP956940335 * T5l);
Chris@42 1276 T5p = FNMS(KP290284677, T5o, KP956940335 * T5n);
Chris@42 1277 T5q = T5m - T5p;
Chris@42 1278 T6W = T5p + T5m;
Chris@42 1279 T5T = T5z + T5A;
Chris@42 1280 T5U = T5C + T5D;
Chris@42 1281 T5V = FNMS(KP146730474, T5U, KP989176509 * T5T);
Chris@42 1282 T5Y = FMA(KP146730474, T5T, KP989176509 * T5U);
Chris@42 1283 }
Chris@42 1284 {
Chris@42 1285 E T5u, T5x, T5M, T5N;
Chris@42 1286 T5u = T5s - T5t;
Chris@42 1287 T5x = T5v - T5w;
Chris@42 1288 T5y = FMA(KP803207531, T5u, KP595699304 * T5x);
Chris@42 1289 T5J = FNMS(KP595699304, T5u, KP803207531 * T5x);
Chris@42 1290 T5M = FMA(KP956940335, T5o, KP290284677 * T5n);
Chris@42 1291 T5N = FNMS(KP290284677, T5l, KP956940335 * T5k);
Chris@42 1292 T5O = T5M + T5N;
Chris@42 1293 T6O = T5N - T5M;
Chris@42 1294 }
Chris@42 1295 {
Chris@42 1296 E T5Q, T5R, T5B, T5E;
Chris@42 1297 T5Q = T5s + T5t;
Chris@42 1298 T5R = T5v + T5w;
Chris@42 1299 T5S = FMA(KP989176509, T5Q, KP146730474 * T5R);
Chris@42 1300 T5Z = FNMS(KP146730474, T5Q, KP989176509 * T5R);
Chris@42 1301 T5B = T5z - T5A;
Chris@42 1302 T5E = T5C - T5D;
Chris@42 1303 T5F = FNMS(KP595699304, T5E, KP803207531 * T5B);
Chris@42 1304 T5I = FMA(KP595699304, T5B, KP803207531 * T5E);
Chris@42 1305 }
Chris@42 1306 {
Chris@42 1307 E T5r, T5G, T6T, T6U;
Chris@42 1308 T5r = T5j + T5q;
Chris@42 1309 T5G = T5y + T5F;
Chris@42 1310 Cr[WS(csr, 25)] = T5r - T5G;
Chris@42 1311 Cr[WS(csr, 6)] = T5r + T5G;
Chris@42 1312 T6T = T5J + T5I;
Chris@42 1313 T6U = T6O + T6R;
Chris@42 1314 Ci[WS(csi, 25)] = T6T - T6U;
Chris@42 1315 Ci[WS(csi, 6)] = T6T + T6U;
Chris@42 1316 }
Chris@42 1317 {
Chris@42 1318 E T5H, T5K, T6N, T6S;
Chris@42 1319 T5H = T5j - T5q;
Chris@42 1320 T5K = T5I - T5J;
Chris@42 1321 Cr[WS(csr, 22)] = T5H - T5K;
Chris@42 1322 Cr[WS(csr, 9)] = T5H + T5K;
Chris@42 1323 T6N = T5F - T5y;
Chris@42 1324 T6S = T6O - T6R;
Chris@42 1325 Ci[WS(csi, 22)] = T6N - T6S;
Chris@42 1326 Ci[WS(csi, 9)] = T6N + T6S;
Chris@42 1327 }
Chris@42 1328 {
Chris@42 1329 E T5P, T5W, T6Z, T70;
Chris@42 1330 T5P = T5L + T5O;
Chris@42 1331 T5W = T5S + T5V;
Chris@42 1332 Cr[WS(csr, 30)] = T5P - T5W;
Chris@42 1333 Cr[WS(csr, 1)] = T5P + T5W;
Chris@42 1334 T6Z = T5Z + T5Y;
Chris@42 1335 T70 = T6W + T6X;
Chris@42 1336 Ci[WS(csi, 30)] = T6Z - T70;
Chris@42 1337 Ci[WS(csi, 1)] = T6Z + T70;
Chris@42 1338 }
Chris@42 1339 {
Chris@42 1340 E T5X, T60, T6V, T6Y;
Chris@42 1341 T5X = T5L - T5O;
Chris@42 1342 T60 = T5Y - T5Z;
Chris@42 1343 Cr[WS(csr, 17)] = T5X - T60;
Chris@42 1344 Cr[WS(csr, 14)] = T5X + T60;
Chris@42 1345 T6V = T5V - T5S;
Chris@42 1346 T6Y = T6W - T6X;
Chris@42 1347 Ci[WS(csi, 17)] = T6V - T6Y;
Chris@42 1348 Ci[WS(csi, 14)] = T6V + T6Y;
Chris@42 1349 }
Chris@42 1350 }
Chris@42 1351 {
Chris@42 1352 E T37, T3z, T6n, T6t, T3e, T6s, T3J, T3M, T3m, T3x, T3C, T6k, T3G, T3N, T3t;
Chris@42 1353 E T3w, T36, T6l;
Chris@42 1354 T36 = T34 + T35;
Chris@42 1355 T37 = T33 - T36;
Chris@42 1356 T3z = T33 + T36;
Chris@42 1357 T6l = Tv - Tm;
Chris@42 1358 T6n = T6l - T6m;
Chris@42 1359 T6t = T6l + T6m;
Chris@42 1360 {
Chris@42 1361 E T3a, T3d, T3H, T3I;
Chris@42 1362 T3a = FMA(KP634393284, T38, KP773010453 * T39);
Chris@42 1363 T3d = FNMS(KP634393284, T3c, KP773010453 * T3b);
Chris@42 1364 T3e = T3a - T3d;
Chris@42 1365 T6s = T3d + T3a;
Chris@42 1366 T3H = T3n + T3o;
Chris@42 1367 T3I = T3q + T3r;
Chris@42 1368 T3J = FNMS(KP336889853, T3I, KP941544065 * T3H);
Chris@42 1369 T3M = FMA(KP336889853, T3H, KP941544065 * T3I);
Chris@42 1370 }
Chris@42 1371 {
Chris@42 1372 E T3i, T3l, T3A, T3B;
Chris@42 1373 T3i = T3g - T3h;
Chris@42 1374 T3l = T3j - T3k;
Chris@42 1375 T3m = FMA(KP903989293, T3i, KP427555093 * T3l);
Chris@42 1376 T3x = FNMS(KP427555093, T3i, KP903989293 * T3l);
Chris@42 1377 T3A = FMA(KP773010453, T3c, KP634393284 * T3b);
Chris@42 1378 T3B = FNMS(KP634393284, T39, KP773010453 * T38);
Chris@42 1379 T3C = T3A + T3B;
Chris@42 1380 T6k = T3B - T3A;
Chris@42 1381 }
Chris@42 1382 {
Chris@42 1383 E T3E, T3F, T3p, T3s;
Chris@42 1384 T3E = T3g + T3h;
Chris@42 1385 T3F = T3j + T3k;
Chris@42 1386 T3G = FMA(KP941544065, T3E, KP336889853 * T3F);
Chris@42 1387 T3N = FNMS(KP336889853, T3E, KP941544065 * T3F);
Chris@42 1388 T3p = T3n - T3o;
Chris@42 1389 T3s = T3q - T3r;
Chris@42 1390 T3t = FNMS(KP427555093, T3s, KP903989293 * T3p);
Chris@42 1391 T3w = FMA(KP427555093, T3p, KP903989293 * T3s);
Chris@42 1392 }
Chris@42 1393 {
Chris@42 1394 E T3f, T3u, T6p, T6q;
Chris@42 1395 T3f = T37 + T3e;
Chris@42 1396 T3u = T3m + T3t;
Chris@42 1397 Cr[WS(csr, 27)] = T3f - T3u;
Chris@42 1398 Cr[WS(csr, 4)] = T3f + T3u;
Chris@42 1399 T6p = T3x + T3w;
Chris@42 1400 T6q = T6k + T6n;
Chris@42 1401 Ci[WS(csi, 27)] = T6p - T6q;
Chris@42 1402 Ci[WS(csi, 4)] = T6p + T6q;
Chris@42 1403 }
Chris@42 1404 {
Chris@42 1405 E T3v, T3y, T6j, T6o;
Chris@42 1406 T3v = T37 - T3e;
Chris@42 1407 T3y = T3w - T3x;
Chris@42 1408 Cr[WS(csr, 20)] = T3v - T3y;
Chris@42 1409 Cr[WS(csr, 11)] = T3v + T3y;
Chris@42 1410 T6j = T3t - T3m;
Chris@42 1411 T6o = T6k - T6n;
Chris@42 1412 Ci[WS(csi, 20)] = T6j - T6o;
Chris@42 1413 Ci[WS(csi, 11)] = T6j + T6o;
Chris@42 1414 }
Chris@42 1415 {
Chris@42 1416 E T3D, T3K, T6v, T6w;
Chris@42 1417 T3D = T3z + T3C;
Chris@42 1418 T3K = T3G + T3J;
Chris@42 1419 Cr[WS(csr, 28)] = T3D - T3K;
Chris@42 1420 Cr[WS(csr, 3)] = T3D + T3K;
Chris@42 1421 T6v = T3N + T3M;
Chris@42 1422 T6w = T6s + T6t;
Chris@42 1423 Ci[WS(csi, 28)] = T6v - T6w;
Chris@42 1424 Ci[WS(csi, 3)] = T6v + T6w;
Chris@42 1425 }
Chris@42 1426 {
Chris@42 1427 E T3L, T3O, T6r, T6u;
Chris@42 1428 T3L = T3z - T3C;
Chris@42 1429 T3O = T3M - T3N;
Chris@42 1430 Cr[WS(csr, 19)] = T3L - T3O;
Chris@42 1431 Cr[WS(csr, 12)] = T3L + T3O;
Chris@42 1432 T6r = T3J - T3G;
Chris@42 1433 T6u = T6s - T6t;
Chris@42 1434 Ci[WS(csi, 19)] = T6r - T6u;
Chris@42 1435 Ci[WS(csi, 12)] = T6r + T6u;
Chris@42 1436 }
Chris@42 1437 }
Chris@42 1438 {
Chris@42 1439 E T41, T4Z, T6D, T6J, T4g, T6I, T59, T5d, T4A, T4X, T52, T6y, T56, T5c, T4T;
Chris@42 1440 E T4W, T40, T6C;
Chris@42 1441 T40 = T3W - T3Z;
Chris@42 1442 T41 = T3T + T40;
Chris@42 1443 T4Z = T3T - T40;
Chris@42 1444 T6C = T5g + T5h;
Chris@42 1445 T6D = T6B - T6C;
Chris@42 1446 T6J = T6C + T6B;
Chris@42 1447 {
Chris@42 1448 E T48, T4f, T57, T58;
Chris@42 1449 T48 = FMA(KP881921264, T44, KP471396736 * T47);
Chris@42 1450 T4f = FMA(KP881921264, T4b, KP471396736 * T4e);
Chris@42 1451 T4g = T48 - T4f;
Chris@42 1452 T6I = T48 + T4f;
Chris@42 1453 T57 = T4K + T4H;
Chris@42 1454 T58 = T4R + T4O;
Chris@42 1455 T59 = FMA(KP514102744, T57, KP857728610 * T58);
Chris@42 1456 T5d = FNMS(KP857728610, T57, KP514102744 * T58);
Chris@42 1457 }
Chris@42 1458 {
Chris@42 1459 E T4s, T4z, T50, T51;
Chris@42 1460 T4s = T4k + T4r;
Chris@42 1461 T4z = T4v - T4y;
Chris@42 1462 T4A = FMA(KP970031253, T4s, KP242980179 * T4z);
Chris@42 1463 T4X = FNMS(KP242980179, T4s, KP970031253 * T4z);
Chris@42 1464 T50 = FNMS(KP471396736, T4b, KP881921264 * T4e);
Chris@42 1465 T51 = FNMS(KP471396736, T44, KP881921264 * T47);
Chris@42 1466 T52 = T50 - T51;
Chris@42 1467 T6y = T51 + T50;
Chris@42 1468 }
Chris@42 1469 {
Chris@42 1470 E T54, T55, T4L, T4S;
Chris@42 1471 T54 = T4k - T4r;
Chris@42 1472 T55 = T4y + T4v;
Chris@42 1473 T56 = FMA(KP514102744, T54, KP857728610 * T55);
Chris@42 1474 T5c = FNMS(KP514102744, T55, KP857728610 * T54);
Chris@42 1475 T4L = T4H - T4K;
Chris@42 1476 T4S = T4O - T4R;
Chris@42 1477 T4T = FNMS(KP242980179, T4S, KP970031253 * T4L);
Chris@42 1478 T4W = FMA(KP242980179, T4L, KP970031253 * T4S);
Chris@42 1479 }
Chris@42 1480 {
Chris@42 1481 E T4h, T4U, T6F, T6G;
Chris@42 1482 T4h = T41 + T4g;
Chris@42 1483 T4U = T4A + T4T;
Chris@42 1484 Cr[WS(csr, 29)] = T4h - T4U;
Chris@42 1485 Cr[WS(csr, 2)] = T4h + T4U;
Chris@42 1486 T6F = T4X + T4W;
Chris@42 1487 T6G = T6y + T6D;
Chris@42 1488 Ci[WS(csi, 29)] = T6F - T6G;
Chris@42 1489 Ci[WS(csi, 2)] = T6F + T6G;
Chris@42 1490 }
Chris@42 1491 {
Chris@42 1492 E T4V, T4Y, T6x, T6E;
Chris@42 1493 T4V = T41 - T4g;
Chris@42 1494 T4Y = T4W - T4X;
Chris@42 1495 Cr[WS(csr, 18)] = T4V - T4Y;
Chris@42 1496 Cr[WS(csr, 13)] = T4V + T4Y;
Chris@42 1497 T6x = T4T - T4A;
Chris@42 1498 T6E = T6y - T6D;
Chris@42 1499 Ci[WS(csi, 18)] = T6x - T6E;
Chris@42 1500 Ci[WS(csi, 13)] = T6x + T6E;
Chris@42 1501 }
Chris@42 1502 {
Chris@42 1503 E T53, T5a, T6L, T6M;
Chris@42 1504 T53 = T4Z - T52;
Chris@42 1505 T5a = T56 - T59;
Chris@42 1506 Cr[WS(csr, 21)] = T53 - T5a;
Chris@42 1507 Cr[WS(csr, 10)] = T53 + T5a;
Chris@42 1508 T6L = T5d - T5c;
Chris@42 1509 T6M = T6J - T6I;
Chris@42 1510 Ci[WS(csi, 21)] = T6L - T6M;
Chris@42 1511 Ci[WS(csi, 10)] = T6L + T6M;
Chris@42 1512 }
Chris@42 1513 {
Chris@42 1514 E T5b, T5e, T6H, T6K;
Chris@42 1515 T5b = T4Z + T52;
Chris@42 1516 T5e = T5c + T5d;
Chris@42 1517 Cr[WS(csr, 26)] = T5b - T5e;
Chris@42 1518 Cr[WS(csr, 5)] = T5b + T5e;
Chris@42 1519 T6H = T56 + T59;
Chris@42 1520 T6K = T6I + T6J;
Chris@42 1521 Ci[WS(csi, 5)] = -(T6H + T6K);
Chris@42 1522 Ci[WS(csi, 26)] = T6K - T6H;
Chris@42 1523 }
Chris@42 1524 }
Chris@42 1525 }
Chris@42 1526 }
Chris@42 1527 }
Chris@42 1528
Chris@42 1529 static const kr2c_desc desc = { 64, "r2cfII_64", {342, 114, 92, 0}, &GENUS };
Chris@42 1530
Chris@42 1531 void X(codelet_r2cfII_64) (planner *p) {
Chris@42 1532 X(kr2c_register) (p, r2cfII_64, &desc);
Chris@42 1533 }
Chris@42 1534
Chris@42 1535 #endif /* HAVE_FMA */