annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cfII_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:47:29 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 174 FP additions, 128 FP multiplications,
Chris@42 32 * (or, 46 additions, 0 multiplications, 128 fused multiply/add),
Chris@42 33 * 96 stack variables, 15 constants, and 64 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cfII.h"
Chris@42 36
Chris@42 37 static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 40 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 41 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 42 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 43 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 44 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 45 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 46 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 47 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 48 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 49 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 50 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 51 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 52 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 53 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 54 {
Chris@42 55 INT i;
Chris@42 56 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@42 57 E T23, T1S, T21, T1L, T2z, T2x, T1Z, T22;
Chris@42 58 {
Chris@42 59 E T2n, T2B, T1z, T5, T1C, T2C, T2o, Tc, T27, T1J, T1l, Tm, T26, T1G, T1k;
Chris@42 60 E Tv, T1s, T1c, T2e, T1Y, T1r, T15, T2d, T1V, TP, TF, T1M, TC, T1P, TN;
Chris@42 61 E TO, TI;
Chris@42 62 {
Chris@42 63 E T1A, T8, Te, Tj, Tf, T1B, Tb, Tg;
Chris@42 64 {
Chris@42 65 E T1, T2l, T2, T3, T9, Ta;
Chris@42 66 T1 = R0[0];
Chris@42 67 T2l = R0[WS(rs, 8)];
Chris@42 68 T2 = R0[WS(rs, 4)];
Chris@42 69 T3 = R0[WS(rs, 12)];
Chris@42 70 {
Chris@42 71 E T6, T7, T2m, T4;
Chris@42 72 T6 = R0[WS(rs, 10)];
Chris@42 73 T7 = R0[WS(rs, 2)];
Chris@42 74 T9 = R0[WS(rs, 6)];
Chris@42 75 T2m = T2 + T3;
Chris@42 76 T4 = T2 - T3;
Chris@42 77 T1A = FNMS(KP414213562, T6, T7);
Chris@42 78 T8 = FMA(KP414213562, T7, T6);
Chris@42 79 T2n = FMA(KP707106781, T2m, T2l);
Chris@42 80 T2B = FNMS(KP707106781, T2m, T2l);
Chris@42 81 T1z = FMA(KP707106781, T4, T1);
Chris@42 82 T5 = FNMS(KP707106781, T4, T1);
Chris@42 83 Ta = R0[WS(rs, 14)];
Chris@42 84 }
Chris@42 85 Te = R0[WS(rs, 7)];
Chris@42 86 Tj = R0[WS(rs, 15)];
Chris@42 87 Tf = R0[WS(rs, 3)];
Chris@42 88 T1B = FMS(KP414213562, T9, Ta);
Chris@42 89 Tb = FMA(KP414213562, Ta, T9);
Chris@42 90 Tg = R0[WS(rs, 11)];
Chris@42 91 }
Chris@42 92 {
Chris@42 93 E Tn, Ts, To, T1I, Tl, T1H, Ti, Tp, Tk, Th, T1T, T1U;
Chris@42 94 Tn = R0[WS(rs, 9)];
Chris@42 95 T1C = T1A + T1B;
Chris@42 96 T2C = T1B - T1A;
Chris@42 97 T2o = T8 + Tb;
Chris@42 98 Tc = T8 - Tb;
Chris@42 99 Tk = Tg - Tf;
Chris@42 100 Th = Tf + Tg;
Chris@42 101 Ts = R0[WS(rs, 1)];
Chris@42 102 To = R0[WS(rs, 5)];
Chris@42 103 T1I = FMA(KP707106781, Tk, Tj);
Chris@42 104 Tl = FNMS(KP707106781, Tk, Tj);
Chris@42 105 T1H = FMA(KP707106781, Th, Te);
Chris@42 106 Ti = FNMS(KP707106781, Th, Te);
Chris@42 107 Tp = R0[WS(rs, 13)];
Chris@42 108 {
Chris@42 109 E TT, T16, TY, T17, TW, TZ, T11, T12, Tt, Tq;
Chris@42 110 TT = R1[WS(rs, 15)];
Chris@42 111 T27 = FNMS(KP198912367, T1H, T1I);
Chris@42 112 T1J = FMA(KP198912367, T1I, T1H);
Chris@42 113 T1l = FMA(KP668178637, Ti, Tl);
Chris@42 114 Tm = FNMS(KP668178637, Tl, Ti);
Chris@42 115 Tt = To - Tp;
Chris@42 116 Tq = To + Tp;
Chris@42 117 T16 = R1[WS(rs, 7)];
Chris@42 118 {
Chris@42 119 E TU, T1F, Tu, T1E, Tr, TV;
Chris@42 120 TU = R1[WS(rs, 3)];
Chris@42 121 T1F = FMA(KP707106781, Tt, Ts);
Chris@42 122 Tu = FNMS(KP707106781, Tt, Ts);
Chris@42 123 T1E = FMA(KP707106781, Tq, Tn);
Chris@42 124 Tr = FNMS(KP707106781, Tq, Tn);
Chris@42 125 TV = R1[WS(rs, 11)];
Chris@42 126 TY = R1[WS(rs, 9)];
Chris@42 127 T26 = FNMS(KP198912367, T1E, T1F);
Chris@42 128 T1G = FMA(KP198912367, T1F, T1E);
Chris@42 129 T1k = FMA(KP668178637, Tr, Tu);
Chris@42 130 Tv = FNMS(KP668178637, Tu, Tr);
Chris@42 131 T17 = TU + TV;
Chris@42 132 TW = TU - TV;
Chris@42 133 TZ = R1[WS(rs, 1)];
Chris@42 134 T11 = R1[WS(rs, 5)];
Chris@42 135 T12 = R1[WS(rs, 13)];
Chris@42 136 }
Chris@42 137 {
Chris@42 138 E TX, T1a, T10, T19, T13, T1W, T18, T1b, T14, T1X;
Chris@42 139 T1T = FMS(KP707106781, TW, TT);
Chris@42 140 TX = FMA(KP707106781, TW, TT);
Chris@42 141 T1a = FNMS(KP414213562, TY, TZ);
Chris@42 142 T10 = FMA(KP414213562, TZ, TY);
Chris@42 143 T19 = FMS(KP414213562, T11, T12);
Chris@42 144 T13 = FMA(KP414213562, T12, T11);
Chris@42 145 T1W = FMA(KP707106781, T17, T16);
Chris@42 146 T18 = FNMS(KP707106781, T17, T16);
Chris@42 147 T1b = T19 - T1a;
Chris@42 148 T1U = T1a + T19;
Chris@42 149 T14 = T10 - T13;
Chris@42 150 T1X = T10 + T13;
Chris@42 151 T1s = FMA(KP923879532, T1b, T18);
Chris@42 152 T1c = FNMS(KP923879532, T1b, T18);
Chris@42 153 T2e = FMA(KP923879532, T1X, T1W);
Chris@42 154 T1Y = FNMS(KP923879532, T1X, T1W);
Chris@42 155 T1r = FNMS(KP923879532, T14, TX);
Chris@42 156 T15 = FMA(KP923879532, T14, TX);
Chris@42 157 }
Chris@42 158 }
Chris@42 159 {
Chris@42 160 E Ty, TL, TG, TM, TB, TH;
Chris@42 161 Ty = R1[0];
Chris@42 162 TL = R1[WS(rs, 8)];
Chris@42 163 {
Chris@42 164 E Tz, TA, TD, TE;
Chris@42 165 Tz = R1[WS(rs, 4)];
Chris@42 166 T2d = FMA(KP923879532, T1U, T1T);
Chris@42 167 T1V = FNMS(KP923879532, T1U, T1T);
Chris@42 168 TA = R1[WS(rs, 12)];
Chris@42 169 TD = R1[WS(rs, 10)];
Chris@42 170 TE = R1[WS(rs, 2)];
Chris@42 171 TG = R1[WS(rs, 6)];
Chris@42 172 TM = Tz + TA;
Chris@42 173 TB = Tz - TA;
Chris@42 174 TP = FNMS(KP414213562, TD, TE);
Chris@42 175 TF = FMA(KP414213562, TE, TD);
Chris@42 176 TH = R1[WS(rs, 14)];
Chris@42 177 }
Chris@42 178 T1M = FMA(KP707106781, TB, Ty);
Chris@42 179 TC = FNMS(KP707106781, TB, Ty);
Chris@42 180 T1P = FMA(KP707106781, TM, TL);
Chris@42 181 TN = FNMS(KP707106781, TM, TL);
Chris@42 182 TO = FMS(KP414213562, TG, TH);
Chris@42 183 TI = FMA(KP414213562, TH, TG);
Chris@42 184 }
Chris@42 185 }
Chris@42 186 }
Chris@42 187 {
Chris@42 188 E T1j, T1O, T1p, T1R, T1o, T2E, T2D, T1m, T1D, T2w, T2v, T1K, T2i, T2c, T2h;
Chris@42 189 E T29, T2t, T2r, T2f, T2j;
Chris@42 190 {
Chris@42 191 E T2a, T2b, T1g, TS, T1f, Tx, T2N, T2L, T1d, T1h;
Chris@42 192 {
Chris@42 193 E Td, TR, TK, Tw, T2J, T2K;
Chris@42 194 T1j = FMA(KP923879532, Tc, T5);
Chris@42 195 Td = FNMS(KP923879532, Tc, T5);
Chris@42 196 {
Chris@42 197 E T1N, TQ, T1Q, TJ;
Chris@42 198 T1N = TP + TO;
Chris@42 199 TQ = TO - TP;
Chris@42 200 T1Q = TF + TI;
Chris@42 201 TJ = TF - TI;
Chris@42 202 T2a = FMA(KP923879532, T1N, T1M);
Chris@42 203 T1O = FNMS(KP923879532, T1N, T1M);
Chris@42 204 T1p = FMA(KP923879532, TQ, TN);
Chris@42 205 TR = FNMS(KP923879532, TQ, TN);
Chris@42 206 T2b = FMA(KP923879532, T1Q, T1P);
Chris@42 207 T1R = FNMS(KP923879532, T1Q, T1P);
Chris@42 208 T1o = FMA(KP923879532, TJ, TC);
Chris@42 209 TK = FNMS(KP923879532, TJ, TC);
Chris@42 210 Tw = Tm - Tv;
Chris@42 211 T2E = Tv + Tm;
Chris@42 212 }
Chris@42 213 T2D = FMA(KP923879532, T2C, T2B);
Chris@42 214 T2J = FNMS(KP923879532, T2C, T2B);
Chris@42 215 T2K = T1k + T1l;
Chris@42 216 T1m = T1k - T1l;
Chris@42 217 T1g = FMA(KP534511135, TK, TR);
Chris@42 218 TS = FNMS(KP534511135, TR, TK);
Chris@42 219 T1f = FNMS(KP831469612, Tw, Td);
Chris@42 220 Tx = FMA(KP831469612, Tw, Td);
Chris@42 221 T2N = FNMS(KP831469612, T2K, T2J);
Chris@42 222 T2L = FMA(KP831469612, T2K, T2J);
Chris@42 223 T1d = FNMS(KP534511135, T1c, T15);
Chris@42 224 T1h = FMA(KP534511135, T15, T1c);
Chris@42 225 }
Chris@42 226 {
Chris@42 227 E T25, T28, T2p, T2q;
Chris@42 228 T1D = FNMS(KP923879532, T1C, T1z);
Chris@42 229 T25 = FMA(KP923879532, T1C, T1z);
Chris@42 230 {
Chris@42 231 E T2O, T1e, T2M, T1i;
Chris@42 232 T2O = TS + T1d;
Chris@42 233 T1e = TS - T1d;
Chris@42 234 T2M = T1g + T1h;
Chris@42 235 T1i = T1g - T1h;
Chris@42 236 Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N);
Chris@42 237 Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N));
Chris@42 238 Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx);
Chris@42 239 Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx);
Chris@42 240 Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L));
Chris@42 241 Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L);
Chris@42 242 Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f);
Chris@42 243 Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f);
Chris@42 244 T28 = T26 - T27;
Chris@42 245 T2w = T26 + T27;
Chris@42 246 }
Chris@42 247 T2v = FNMS(KP923879532, T2o, T2n);
Chris@42 248 T2p = FMA(KP923879532, T2o, T2n);
Chris@42 249 T2q = T1G + T1J;
Chris@42 250 T1K = T1G - T1J;
Chris@42 251 T2i = FMA(KP098491403, T2a, T2b);
Chris@42 252 T2c = FNMS(KP098491403, T2b, T2a);
Chris@42 253 T2h = FNMS(KP980785280, T28, T25);
Chris@42 254 T29 = FMA(KP980785280, T28, T25);
Chris@42 255 T2t = FNMS(KP980785280, T2q, T2p);
Chris@42 256 T2r = FMA(KP980785280, T2q, T2p);
Chris@42 257 T2f = FMA(KP098491403, T2e, T2d);
Chris@42 258 T2j = FNMS(KP098491403, T2d, T2e);
Chris@42 259 }
Chris@42 260 }
Chris@42 261 {
Chris@42 262 E T1x, T1q, T1v, T1n, T2H, T2F, T1t, T1w;
Chris@42 263 {
Chris@42 264 E T2u, T2g, T2s, T2k;
Chris@42 265 T2u = T2f - T2c;
Chris@42 266 T2g = T2c + T2f;
Chris@42 267 T2s = T2i + T2j;
Chris@42 268 T2k = T2i - T2j;
Chris@42 269 Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t);
Chris@42 270 Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t);
Chris@42 271 Cr[0] = FMA(KP995184726, T2g, T29);
Chris@42 272 Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29);
Chris@42 273 Ci[0] = -(FMA(KP995184726, T2s, T2r));
Chris@42 274 Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r);
Chris@42 275 Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h);
Chris@42 276 Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h);
Chris@42 277 }
Chris@42 278 T1x = FNMS(KP303346683, T1o, T1p);
Chris@42 279 T1q = FMA(KP303346683, T1p, T1o);
Chris@42 280 T1v = FNMS(KP831469612, T1m, T1j);
Chris@42 281 T1n = FMA(KP831469612, T1m, T1j);
Chris@42 282 T2H = FNMS(KP831469612, T2E, T2D);
Chris@42 283 T2F = FMA(KP831469612, T2E, T2D);
Chris@42 284 T1t = FMA(KP303346683, T1s, T1r);
Chris@42 285 T1w = FNMS(KP303346683, T1r, T1s);
Chris@42 286 {
Chris@42 287 E T2I, T1u, T2G, T1y;
Chris@42 288 T2I = T1q + T1t;
Chris@42 289 T1u = T1q - T1t;
Chris@42 290 T2G = T1x + T1w;
Chris@42 291 T1y = T1w - T1x;
Chris@42 292 Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H));
Chris@42 293 Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H);
Chris@42 294 Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n);
Chris@42 295 Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n);
Chris@42 296 Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F);
Chris@42 297 Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F);
Chris@42 298 Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v);
Chris@42 299 Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v);
Chris@42 300 }
Chris@42 301 T23 = FNMS(KP820678790, T1O, T1R);
Chris@42 302 T1S = FMA(KP820678790, T1R, T1O);
Chris@42 303 T21 = FNMS(KP980785280, T1K, T1D);
Chris@42 304 T1L = FMA(KP980785280, T1K, T1D);
Chris@42 305 T2z = FMA(KP980785280, T2w, T2v);
Chris@42 306 T2x = FNMS(KP980785280, T2w, T2v);
Chris@42 307 T1Z = FNMS(KP820678790, T1Y, T1V);
Chris@42 308 T22 = FMA(KP820678790, T1V, T1Y);
Chris@42 309 }
Chris@42 310 }
Chris@42 311 }
Chris@42 312 {
Chris@42 313 E T20, T2A, T24, T2y;
Chris@42 314 T20 = T1S + T1Z;
Chris@42 315 T2A = T1Z - T1S;
Chris@42 316 T24 = T22 - T23;
Chris@42 317 T2y = T23 + T22;
Chris@42 318 Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z);
Chris@42 319 Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z);
Chris@42 320 Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L);
Chris@42 321 Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L);
Chris@42 322 Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x);
Chris@42 323 Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x);
Chris@42 324 Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21);
Chris@42 325 Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21);
Chris@42 326 }
Chris@42 327 }
Chris@42 328 }
Chris@42 329 }
Chris@42 330
Chris@42 331 static const kr2c_desc desc = { 32, "r2cfII_32", {46, 0, 128, 0}, &GENUS };
Chris@42 332
Chris@42 333 void X(codelet_r2cfII_32) (planner *p) {
Chris@42 334 X(kr2c_register) (p, r2cfII_32, &desc);
Chris@42 335 }
Chris@42 336
Chris@42 337 #else /* HAVE_FMA */
Chris@42 338
Chris@42 339 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include r2cfII.h */
Chris@42 340
Chris@42 341 /*
Chris@42 342 * This function contains 174 FP additions, 82 FP multiplications,
Chris@42 343 * (or, 138 additions, 46 multiplications, 36 fused multiply/add),
Chris@42 344 * 62 stack variables, 15 constants, and 64 memory accesses
Chris@42 345 */
Chris@42 346 #include "r2cfII.h"
Chris@42 347
Chris@42 348 static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 349 {
Chris@42 350 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@42 351 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 352 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@42 353 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 354 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@42 355 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 356 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 357 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@42 358 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 359 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 360 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 361 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 362 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 363 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 364 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 365 {
Chris@42 366 INT i;
Chris@42 367 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@42 368 E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
Chris@42 369 E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
Chris@42 370 E T1O, T2a;
Chris@42 371 {
Chris@42 372 E T1, T2p, T4, T2o, T2, T3;
Chris@42 373 T1 = R0[0];
Chris@42 374 T2p = R0[WS(rs, 8)];
Chris@42 375 T2 = R0[WS(rs, 4)];
Chris@42 376 T3 = R0[WS(rs, 12)];
Chris@42 377 T4 = KP707106781 * (T2 - T3);
Chris@42 378 T2o = KP707106781 * (T2 + T3);
Chris@42 379 T5 = T1 + T4;
Chris@42 380 T2D = T2p - T2o;
Chris@42 381 T1z = T1 - T4;
Chris@42 382 T2q = T2o + T2p;
Chris@42 383 }
Chris@42 384 {
Chris@42 385 E T8, T1A, Tb, T1B;
Chris@42 386 {
Chris@42 387 E T6, T7, T9, Ta;
Chris@42 388 T6 = R0[WS(rs, 2)];
Chris@42 389 T7 = R0[WS(rs, 10)];
Chris@42 390 T8 = FNMS(KP382683432, T7, KP923879532 * T6);
Chris@42 391 T1A = FMA(KP382683432, T6, KP923879532 * T7);
Chris@42 392 T9 = R0[WS(rs, 6)];
Chris@42 393 Ta = R0[WS(rs, 14)];
Chris@42 394 Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
Chris@42 395 T1B = FMA(KP923879532, T9, KP382683432 * Ta);
Chris@42 396 }
Chris@42 397 Tc = T8 + Tb;
Chris@42 398 T2C = Tb - T8;
Chris@42 399 T1C = T1A - T1B;
Chris@42 400 T2n = T1A + T1B;
Chris@42 401 }
Chris@42 402 {
Chris@42 403 E Te, Tk, Th, Tj, Tf, Tg;
Chris@42 404 Te = R0[WS(rs, 1)];
Chris@42 405 Tk = R0[WS(rs, 9)];
Chris@42 406 Tf = R0[WS(rs, 5)];
Chris@42 407 Tg = R0[WS(rs, 13)];
Chris@42 408 Th = KP707106781 * (Tf - Tg);
Chris@42 409 Tj = KP707106781 * (Tf + Tg);
Chris@42 410 {
Chris@42 411 E Ti, Tl, T1H, T1I;
Chris@42 412 Ti = Te + Th;
Chris@42 413 Tl = Tj + Tk;
Chris@42 414 Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
Chris@42 415 T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
Chris@42 416 T1H = Tk - Tj;
Chris@42 417 T1I = Te - Th;
Chris@42 418 T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
Chris@42 419 T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
Chris@42 420 }
Chris@42 421 }
Chris@42 422 {
Chris@42 423 E Tq, Tt, Tp, Ts, Tn, To;
Chris@42 424 Tq = R0[WS(rs, 15)];
Chris@42 425 Tt = R0[WS(rs, 7)];
Chris@42 426 Tn = R0[WS(rs, 3)];
Chris@42 427 To = R0[WS(rs, 11)];
Chris@42 428 Tp = KP707106781 * (Tn - To);
Chris@42 429 Ts = KP707106781 * (Tn + To);
Chris@42 430 {
Chris@42 431 E Tr, Tu, T1E, T1F;
Chris@42 432 Tr = Tp - Tq;
Chris@42 433 Tu = Ts + Tt;
Chris@42 434 Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
Chris@42 435 T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
Chris@42 436 T1E = Tt - Ts;
Chris@42 437 T1F = Tp + Tq;
Chris@42 438 T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
Chris@42 439 T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
Chris@42 440 }
Chris@42 441 }
Chris@42 442 {
Chris@42 443 E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
Chris@42 444 TW = R1[WS(rs, 15)];
Chris@42 445 T1a = R1[WS(rs, 7)];
Chris@42 446 TT = R1[WS(rs, 3)];
Chris@42 447 TU = R1[WS(rs, 11)];
Chris@42 448 TV = KP707106781 * (TT - TU);
Chris@42 449 T19 = KP707106781 * (TT + TU);
Chris@42 450 {
Chris@42 451 E TY, TZ, T11, T12;
Chris@42 452 TY = R1[WS(rs, 1)];
Chris@42 453 TZ = R1[WS(rs, 9)];
Chris@42 454 T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
Chris@42 455 T16 = FMA(KP382683432, TY, KP923879532 * TZ);
Chris@42 456 T11 = R1[WS(rs, 5)];
Chris@42 457 T12 = R1[WS(rs, 13)];
Chris@42 458 T13 = FNMS(KP923879532, T12, KP382683432 * T11);
Chris@42 459 T17 = FMA(KP923879532, T11, KP382683432 * T12);
Chris@42 460 }
Chris@42 461 {
Chris@42 462 E TX, T14, T1W, T1X;
Chris@42 463 TX = TV - TW;
Chris@42 464 T14 = T10 + T13;
Chris@42 465 T15 = TX + T14;
Chris@42 466 T1r = TX - T14;
Chris@42 467 T1W = T13 - T10;
Chris@42 468 T1X = T1a - T19;
Chris@42 469 T1Y = T1W - T1X;
Chris@42 470 T2e = T1W + T1X;
Chris@42 471 }
Chris@42 472 {
Chris@42 473 E T18, T1b, T1T, T1U;
Chris@42 474 T18 = T16 + T17;
Chris@42 475 T1b = T19 + T1a;
Chris@42 476 T1c = T18 + T1b;
Chris@42 477 T1s = T1b - T18;
Chris@42 478 T1T = TV + TW;
Chris@42 479 T1U = T16 - T17;
Chris@42 480 T1V = T1T + T1U;
Chris@42 481 T2d = T1U - T1T;
Chris@42 482 }
Chris@42 483 }
Chris@42 484 {
Chris@42 485 E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
Chris@42 486 Ty = R1[0];
Chris@42 487 TP = R1[WS(rs, 8)];
Chris@42 488 Tz = R1[WS(rs, 4)];
Chris@42 489 TA = R1[WS(rs, 12)];
Chris@42 490 TB = KP707106781 * (Tz - TA);
Chris@42 491 TO = KP707106781 * (Tz + TA);
Chris@42 492 {
Chris@42 493 E TD, TE, TG, TH;
Chris@42 494 TD = R1[WS(rs, 2)];
Chris@42 495 TE = R1[WS(rs, 10)];
Chris@42 496 TF = FNMS(KP382683432, TE, KP923879532 * TD);
Chris@42 497 TL = FMA(KP382683432, TD, KP923879532 * TE);
Chris@42 498 TG = R1[WS(rs, 6)];
Chris@42 499 TH = R1[WS(rs, 14)];
Chris@42 500 TI = FNMS(KP923879532, TH, KP382683432 * TG);
Chris@42 501 TM = FMA(KP923879532, TG, KP382683432 * TH);
Chris@42 502 }
Chris@42 503 {
Chris@42 504 E TC, TJ, T1P, T1Q;
Chris@42 505 TC = Ty + TB;
Chris@42 506 TJ = TF + TI;
Chris@42 507 TK = TC + TJ;
Chris@42 508 T1o = TC - TJ;
Chris@42 509 T1P = TI - TF;
Chris@42 510 T1Q = TP - TO;
Chris@42 511 T1R = T1P - T1Q;
Chris@42 512 T2b = T1P + T1Q;
Chris@42 513 }
Chris@42 514 {
Chris@42 515 E TN, TQ, T1M, T1N;
Chris@42 516 TN = TL + TM;
Chris@42 517 TQ = TO + TP;
Chris@42 518 TR = TN + TQ;
Chris@42 519 T1p = TQ - TN;
Chris@42 520 T1M = Ty - TB;
Chris@42 521 T1N = TL - TM;
Chris@42 522 T1O = T1M - T1N;
Chris@42 523 T2a = T1M + T1N;
Chris@42 524 }
Chris@42 525 }
Chris@42 526 {
Chris@42 527 E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
Chris@42 528 {
Chris@42 529 E Td, Tw, T2m, T2r;
Chris@42 530 Td = T5 + Tc;
Chris@42 531 Tw = Tm + Tv;
Chris@42 532 Tx = Td - Tw;
Chris@42 533 T1f = Td + Tw;
Chris@42 534 T2m = T1l - T1k;
Chris@42 535 T2r = T2n + T2q;
Chris@42 536 T2s = T2m - T2r;
Chris@42 537 T2u = T2m + T2r;
Chris@42 538 }
Chris@42 539 {
Chris@42 540 E TS, T1d, T1g, T1h;
Chris@42 541 TS = FMA(KP098017140, TK, KP995184726 * TR);
Chris@42 542 T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
Chris@42 543 T1e = TS + T1d;
Chris@42 544 T2l = T1d - TS;
Chris@42 545 T1g = FNMS(KP098017140, TR, KP995184726 * TK);
Chris@42 546 T1h = FMA(KP995184726, T15, KP098017140 * T1c);
Chris@42 547 T1i = T1g + T1h;
Chris@42 548 T2t = T1h - T1g;
Chris@42 549 }
Chris@42 550 Cr[WS(csr, 8)] = Tx - T1e;
Chris@42 551 Ci[WS(csi, 8)] = T2t - T2u;
Chris@42 552 Cr[WS(csr, 7)] = Tx + T1e;
Chris@42 553 Ci[WS(csi, 7)] = T2t + T2u;
Chris@42 554 Cr[WS(csr, 15)] = T1f - T1i;
Chris@42 555 Ci[WS(csi, 15)] = T2l - T2s;
Chris@42 556 Cr[0] = T1f + T1i;
Chris@42 557 Ci[0] = T2l + T2s;
Chris@42 558 }
Chris@42 559 {
Chris@42 560 E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
Chris@42 561 {
Chris@42 562 E T25, T28, T2K, T2L;
Chris@42 563 T25 = T1z + T1C;
Chris@42 564 T28 = T26 - T27;
Chris@42 565 T29 = T25 + T28;
Chris@42 566 T2h = T25 - T28;
Chris@42 567 T2K = T1J + T1G;
Chris@42 568 T2L = T2C + T2D;
Chris@42 569 T2M = T2K - T2L;
Chris@42 570 T2O = T2K + T2L;
Chris@42 571 }
Chris@42 572 {
Chris@42 573 E T2c, T2f, T2i, T2j;
Chris@42 574 T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
Chris@42 575 T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
Chris@42 576 T2g = T2c + T2f;
Chris@42 577 T2J = T2f - T2c;
Chris@42 578 T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
Chris@42 579 T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
Chris@42 580 T2k = T2i - T2j;
Chris@42 581 T2N = T2j + T2i;
Chris@42 582 }
Chris@42 583 Cr[WS(csr, 14)] = T29 - T2g;
Chris@42 584 Ci[WS(csi, 14)] = T2N - T2O;
Chris@42 585 Cr[WS(csr, 1)] = T29 + T2g;
Chris@42 586 Ci[WS(csi, 1)] = T2N + T2O;
Chris@42 587 Cr[WS(csr, 9)] = T2h - T2k;
Chris@42 588 Ci[WS(csi, 9)] = T2J - T2M;
Chris@42 589 Cr[WS(csr, 6)] = T2h + T2k;
Chris@42 590 Ci[WS(csi, 6)] = T2J + T2M;
Chris@42 591 }
Chris@42 592 {
Chris@42 593 E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
Chris@42 594 {
Chris@42 595 E T1j, T1m, T2w, T2x;
Chris@42 596 T1j = T5 - Tc;
Chris@42 597 T1m = T1k + T1l;
Chris@42 598 T1n = T1j + T1m;
Chris@42 599 T1v = T1j - T1m;
Chris@42 600 T2w = Tv - Tm;
Chris@42 601 T2x = T2q - T2n;
Chris@42 602 T2y = T2w - T2x;
Chris@42 603 T2A = T2w + T2x;
Chris@42 604 }
Chris@42 605 {
Chris@42 606 E T1q, T1t, T1w, T1x;
Chris@42 607 T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
Chris@42 608 T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
Chris@42 609 T1u = T1q + T1t;
Chris@42 610 T2v = T1t - T1q;
Chris@42 611 T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
Chris@42 612 T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
Chris@42 613 T1y = T1w - T1x;
Chris@42 614 T2z = T1x + T1w;
Chris@42 615 }
Chris@42 616 Cr[WS(csr, 12)] = T1n - T1u;
Chris@42 617 Ci[WS(csi, 12)] = T2z - T2A;
Chris@42 618 Cr[WS(csr, 3)] = T1n + T1u;
Chris@42 619 Ci[WS(csi, 3)] = T2z + T2A;
Chris@42 620 Cr[WS(csr, 11)] = T1v - T1y;
Chris@42 621 Ci[WS(csi, 11)] = T2v - T2y;
Chris@42 622 Cr[WS(csr, 4)] = T1v + T1y;
Chris@42 623 Ci[WS(csi, 4)] = T2v + T2y;
Chris@42 624 }
Chris@42 625 {
Chris@42 626 E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
Chris@42 627 {
Chris@42 628 E T1D, T1K, T2E, T2F;
Chris@42 629 T1D = T1z - T1C;
Chris@42 630 T1K = T1G - T1J;
Chris@42 631 T1L = T1D + T1K;
Chris@42 632 T21 = T1D - T1K;
Chris@42 633 T2E = T2C - T2D;
Chris@42 634 T2F = T26 + T27;
Chris@42 635 T2G = T2E - T2F;
Chris@42 636 T2I = T2F + T2E;
Chris@42 637 }
Chris@42 638 {
Chris@42 639 E T1S, T1Z, T22, T23;
Chris@42 640 T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
Chris@42 641 T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
Chris@42 642 T20 = T1S - T1Z;
Chris@42 643 T2H = T1S + T1Z;
Chris@42 644 T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
Chris@42 645 T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
Chris@42 646 T24 = T22 - T23;
Chris@42 647 T2B = T23 + T22;
Chris@42 648 }
Chris@42 649 Cr[WS(csr, 13)] = T1L - T20;
Chris@42 650 Ci[WS(csi, 13)] = T2B - T2G;
Chris@42 651 Cr[WS(csr, 2)] = T1L + T20;
Chris@42 652 Ci[WS(csi, 2)] = T2B + T2G;
Chris@42 653 Cr[WS(csr, 10)] = T21 - T24;
Chris@42 654 Ci[WS(csi, 10)] = T2I - T2H;
Chris@42 655 Cr[WS(csr, 5)] = T21 + T24;
Chris@42 656 Ci[WS(csi, 5)] = -(T2H + T2I);
Chris@42 657 }
Chris@42 658 }
Chris@42 659 }
Chris@42 660 }
Chris@42 661
Chris@42 662 static const kr2c_desc desc = { 32, "r2cfII_32", {138, 46, 36, 0}, &GENUS };
Chris@42 663
Chris@42 664 void X(codelet_r2cfII_32) (planner *p) {
Chris@42 665 X(kr2c_register) (p, r2cfII_32, &desc);
Chris@42 666 }
Chris@42 667
Chris@42 668 #endif /* HAVE_FMA */