annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cfII_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:44 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 174 FP additions, 128 FP multiplications,
Chris@82 32 * (or, 46 additions, 0 multiplications, 128 fused multiply/add),
Chris@82 33 * 62 stack variables, 15 constants, and 64 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cfII.h"
Chris@82 36
Chris@82 37 static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 40 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@82 41 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 42 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@82 43 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 44 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@82 45 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 46 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 47 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@82 48 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 49 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 50 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 51 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 52 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 53 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 54 {
Chris@82 55 INT i;
Chris@82 56 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 57 E T5, T2B, T1z, T2n, Tc, T2C, T1C, T2o, Tm, T1l, T1J, T27, Tv, T1k, T1G;
Chris@82 58 E T26, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
Chris@82 59 E T1O, T2a;
Chris@82 60 {
Chris@82 61 E T1, T2l, T4, T2m, T2, T3;
Chris@82 62 T1 = R0[0];
Chris@82 63 T2l = R0[WS(rs, 8)];
Chris@82 64 T2 = R0[WS(rs, 4)];
Chris@82 65 T3 = R0[WS(rs, 12)];
Chris@82 66 T4 = T2 - T3;
Chris@82 67 T2m = T2 + T3;
Chris@82 68 T5 = FNMS(KP707106781, T4, T1);
Chris@82 69 T2B = FNMS(KP707106781, T2m, T2l);
Chris@82 70 T1z = FMA(KP707106781, T4, T1);
Chris@82 71 T2n = FMA(KP707106781, T2m, T2l);
Chris@82 72 }
Chris@82 73 {
Chris@82 74 E T8, T1A, Tb, T1B;
Chris@82 75 {
Chris@82 76 E T6, T7, T9, Ta;
Chris@82 77 T6 = R0[WS(rs, 10)];
Chris@82 78 T7 = R0[WS(rs, 2)];
Chris@82 79 T8 = FMA(KP414213562, T7, T6);
Chris@82 80 T1A = FNMS(KP414213562, T6, T7);
Chris@82 81 T9 = R0[WS(rs, 6)];
Chris@82 82 Ta = R0[WS(rs, 14)];
Chris@82 83 Tb = FMA(KP414213562, Ta, T9);
Chris@82 84 T1B = FMS(KP414213562, T9, Ta);
Chris@82 85 }
Chris@82 86 Tc = T8 - Tb;
Chris@82 87 T2C = T1B - T1A;
Chris@82 88 T1C = T1A + T1B;
Chris@82 89 T2o = T8 + Tb;
Chris@82 90 }
Chris@82 91 {
Chris@82 92 E Te, Tj, Th, Tk, Tf, Tg;
Chris@82 93 Te = R0[WS(rs, 7)];
Chris@82 94 Tj = R0[WS(rs, 15)];
Chris@82 95 Tf = R0[WS(rs, 3)];
Chris@82 96 Tg = R0[WS(rs, 11)];
Chris@82 97 Th = Tf + Tg;
Chris@82 98 Tk = Tg - Tf;
Chris@82 99 {
Chris@82 100 E Ti, Tl, T1H, T1I;
Chris@82 101 Ti = FNMS(KP707106781, Th, Te);
Chris@82 102 Tl = FNMS(KP707106781, Tk, Tj);
Chris@82 103 Tm = FNMS(KP668178637, Tl, Ti);
Chris@82 104 T1l = FMA(KP668178637, Ti, Tl);
Chris@82 105 T1H = FMA(KP707106781, Th, Te);
Chris@82 106 T1I = FMA(KP707106781, Tk, Tj);
Chris@82 107 T1J = FMA(KP198912367, T1I, T1H);
Chris@82 108 T27 = FNMS(KP198912367, T1H, T1I);
Chris@82 109 }
Chris@82 110 }
Chris@82 111 {
Chris@82 112 E Tn, Ts, Tq, Tt, To, Tp;
Chris@82 113 Tn = R0[WS(rs, 9)];
Chris@82 114 Ts = R0[WS(rs, 1)];
Chris@82 115 To = R0[WS(rs, 5)];
Chris@82 116 Tp = R0[WS(rs, 13)];
Chris@82 117 Tq = To + Tp;
Chris@82 118 Tt = To - Tp;
Chris@82 119 {
Chris@82 120 E Tr, Tu, T1E, T1F;
Chris@82 121 Tr = FNMS(KP707106781, Tq, Tn);
Chris@82 122 Tu = FNMS(KP707106781, Tt, Ts);
Chris@82 123 Tv = FNMS(KP668178637, Tu, Tr);
Chris@82 124 T1k = FMA(KP668178637, Tr, Tu);
Chris@82 125 T1E = FMA(KP707106781, Tq, Tn);
Chris@82 126 T1F = FMA(KP707106781, Tt, Ts);
Chris@82 127 T1G = FMA(KP198912367, T1F, T1E);
Chris@82 128 T26 = FNMS(KP198912367, T1E, T1F);
Chris@82 129 }
Chris@82 130 }
Chris@82 131 {
Chris@82 132 E TT, T16, TW, T17, T10, T1a, T13, T19, TU, TV;
Chris@82 133 TT = R1[WS(rs, 15)];
Chris@82 134 T16 = R1[WS(rs, 7)];
Chris@82 135 TU = R1[WS(rs, 3)];
Chris@82 136 TV = R1[WS(rs, 11)];
Chris@82 137 TW = TU - TV;
Chris@82 138 T17 = TU + TV;
Chris@82 139 {
Chris@82 140 E TY, TZ, T11, T12;
Chris@82 141 TY = R1[WS(rs, 9)];
Chris@82 142 TZ = R1[WS(rs, 1)];
Chris@82 143 T10 = FMA(KP414213562, TZ, TY);
Chris@82 144 T1a = FNMS(KP414213562, TY, TZ);
Chris@82 145 T11 = R1[WS(rs, 5)];
Chris@82 146 T12 = R1[WS(rs, 13)];
Chris@82 147 T13 = FMA(KP414213562, T12, T11);
Chris@82 148 T19 = FMS(KP414213562, T11, T12);
Chris@82 149 }
Chris@82 150 {
Chris@82 151 E TX, T14, T1W, T1X;
Chris@82 152 TX = FMA(KP707106781, TW, TT);
Chris@82 153 T14 = T10 - T13;
Chris@82 154 T15 = FMA(KP923879532, T14, TX);
Chris@82 155 T1r = FNMS(KP923879532, T14, TX);
Chris@82 156 T1W = FMA(KP707106781, T17, T16);
Chris@82 157 T1X = T10 + T13;
Chris@82 158 T1Y = FNMS(KP923879532, T1X, T1W);
Chris@82 159 T2e = FMA(KP923879532, T1X, T1W);
Chris@82 160 }
Chris@82 161 {
Chris@82 162 E T18, T1b, T1T, T1U;
Chris@82 163 T18 = FNMS(KP707106781, T17, T16);
Chris@82 164 T1b = T19 - T1a;
Chris@82 165 T1c = FNMS(KP923879532, T1b, T18);
Chris@82 166 T1s = FMA(KP923879532, T1b, T18);
Chris@82 167 T1T = FMS(KP707106781, TW, TT);
Chris@82 168 T1U = T1a + T19;
Chris@82 169 T1V = FNMS(KP923879532, T1U, T1T);
Chris@82 170 T2d = FMA(KP923879532, T1U, T1T);
Chris@82 171 }
Chris@82 172 }
Chris@82 173 {
Chris@82 174 E Ty, TL, TB, TM, TF, TP, TI, TO, Tz, TA;
Chris@82 175 Ty = R1[0];
Chris@82 176 TL = R1[WS(rs, 8)];
Chris@82 177 Tz = R1[WS(rs, 4)];
Chris@82 178 TA = R1[WS(rs, 12)];
Chris@82 179 TB = Tz - TA;
Chris@82 180 TM = Tz + TA;
Chris@82 181 {
Chris@82 182 E TD, TE, TG, TH;
Chris@82 183 TD = R1[WS(rs, 10)];
Chris@82 184 TE = R1[WS(rs, 2)];
Chris@82 185 TF = FMA(KP414213562, TE, TD);
Chris@82 186 TP = FNMS(KP414213562, TD, TE);
Chris@82 187 TG = R1[WS(rs, 6)];
Chris@82 188 TH = R1[WS(rs, 14)];
Chris@82 189 TI = FMA(KP414213562, TH, TG);
Chris@82 190 TO = FMS(KP414213562, TG, TH);
Chris@82 191 }
Chris@82 192 {
Chris@82 193 E TC, TJ, T1P, T1Q;
Chris@82 194 TC = FNMS(KP707106781, TB, Ty);
Chris@82 195 TJ = TF - TI;
Chris@82 196 TK = FNMS(KP923879532, TJ, TC);
Chris@82 197 T1o = FMA(KP923879532, TJ, TC);
Chris@82 198 T1P = FMA(KP707106781, TM, TL);
Chris@82 199 T1Q = TF + TI;
Chris@82 200 T1R = FNMS(KP923879532, T1Q, T1P);
Chris@82 201 T2b = FMA(KP923879532, T1Q, T1P);
Chris@82 202 }
Chris@82 203 {
Chris@82 204 E TN, TQ, T1M, T1N;
Chris@82 205 TN = FNMS(KP707106781, TM, TL);
Chris@82 206 TQ = TO - TP;
Chris@82 207 TR = FNMS(KP923879532, TQ, TN);
Chris@82 208 T1p = FMA(KP923879532, TQ, TN);
Chris@82 209 T1M = FMA(KP707106781, TB, Ty);
Chris@82 210 T1N = TP + TO;
Chris@82 211 T1O = FNMS(KP923879532, T1N, T1M);
Chris@82 212 T2a = FMA(KP923879532, T1N, T1M);
Chris@82 213 }
Chris@82 214 }
Chris@82 215 {
Chris@82 216 E Tx, T1f, T2L, T2N, T1e, T2O, T1i, T2M;
Chris@82 217 {
Chris@82 218 E Td, Tw, T2J, T2K;
Chris@82 219 Td = FNMS(KP923879532, Tc, T5);
Chris@82 220 Tw = Tm - Tv;
Chris@82 221 Tx = FMA(KP831469612, Tw, Td);
Chris@82 222 T1f = FNMS(KP831469612, Tw, Td);
Chris@82 223 T2J = FNMS(KP923879532, T2C, T2B);
Chris@82 224 T2K = T1k + T1l;
Chris@82 225 T2L = FMA(KP831469612, T2K, T2J);
Chris@82 226 T2N = FNMS(KP831469612, T2K, T2J);
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E TS, T1d, T1g, T1h;
Chris@82 230 TS = FNMS(KP534511135, TR, TK);
Chris@82 231 T1d = FNMS(KP534511135, T1c, T15);
Chris@82 232 T1e = TS - T1d;
Chris@82 233 T2O = TS + T1d;
Chris@82 234 T1g = FMA(KP534511135, TK, TR);
Chris@82 235 T1h = FMA(KP534511135, T15, T1c);
Chris@82 236 T1i = T1g - T1h;
Chris@82 237 T2M = T1g + T1h;
Chris@82 238 }
Chris@82 239 Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx);
Chris@82 240 Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L);
Chris@82 241 Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx);
Chris@82 242 Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L));
Chris@82 243 Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f);
Chris@82 244 Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N));
Chris@82 245 Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f);
Chris@82 246 Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N);
Chris@82 247 }
Chris@82 248 {
Chris@82 249 E T29, T2h, T2r, T2t, T2g, T2u, T2k, T2s;
Chris@82 250 {
Chris@82 251 E T25, T28, T2p, T2q;
Chris@82 252 T25 = FMA(KP923879532, T1C, T1z);
Chris@82 253 T28 = T26 - T27;
Chris@82 254 T29 = FMA(KP980785280, T28, T25);
Chris@82 255 T2h = FNMS(KP980785280, T28, T25);
Chris@82 256 T2p = FMA(KP923879532, T2o, T2n);
Chris@82 257 T2q = T1G + T1J;
Chris@82 258 T2r = FMA(KP980785280, T2q, T2p);
Chris@82 259 T2t = FNMS(KP980785280, T2q, T2p);
Chris@82 260 }
Chris@82 261 {
Chris@82 262 E T2c, T2f, T2i, T2j;
Chris@82 263 T2c = FNMS(KP098491403, T2b, T2a);
Chris@82 264 T2f = FMA(KP098491403, T2e, T2d);
Chris@82 265 T2g = T2c + T2f;
Chris@82 266 T2u = T2f - T2c;
Chris@82 267 T2i = FMA(KP098491403, T2a, T2b);
Chris@82 268 T2j = FNMS(KP098491403, T2d, T2e);
Chris@82 269 T2k = T2i - T2j;
Chris@82 270 T2s = T2i + T2j;
Chris@82 271 }
Chris@82 272 Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29);
Chris@82 273 Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r);
Chris@82 274 Cr[0] = FMA(KP995184726, T2g, T29);
Chris@82 275 Ci[0] = -(FMA(KP995184726, T2s, T2r));
Chris@82 276 Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h);
Chris@82 277 Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t);
Chris@82 278 Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h);
Chris@82 279 Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t);
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E T1n, T1v, T2F, T2H, T1u, T2I, T1y, T2G;
Chris@82 283 {
Chris@82 284 E T1j, T1m, T2D, T2E;
Chris@82 285 T1j = FMA(KP923879532, Tc, T5);
Chris@82 286 T1m = T1k - T1l;
Chris@82 287 T1n = FMA(KP831469612, T1m, T1j);
Chris@82 288 T1v = FNMS(KP831469612, T1m, T1j);
Chris@82 289 T2D = FMA(KP923879532, T2C, T2B);
Chris@82 290 T2E = Tv + Tm;
Chris@82 291 T2F = FMA(KP831469612, T2E, T2D);
Chris@82 292 T2H = FNMS(KP831469612, T2E, T2D);
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E T1q, T1t, T1w, T1x;
Chris@82 296 T1q = FMA(KP303346683, T1p, T1o);
Chris@82 297 T1t = FMA(KP303346683, T1s, T1r);
Chris@82 298 T1u = T1q - T1t;
Chris@82 299 T2I = T1q + T1t;
Chris@82 300 T1w = FNMS(KP303346683, T1r, T1s);
Chris@82 301 T1x = FNMS(KP303346683, T1o, T1p);
Chris@82 302 T1y = T1w - T1x;
Chris@82 303 T2G = T1x + T1w;
Chris@82 304 }
Chris@82 305 Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n);
Chris@82 306 Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F);
Chris@82 307 Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n);
Chris@82 308 Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F);
Chris@82 309 Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v);
Chris@82 310 Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H);
Chris@82 311 Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v);
Chris@82 312 Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H));
Chris@82 313 }
Chris@82 314 {
Chris@82 315 E T1L, T21, T2x, T2z, T20, T2A, T24, T2y;
Chris@82 316 {
Chris@82 317 E T1D, T1K, T2v, T2w;
Chris@82 318 T1D = FNMS(KP923879532, T1C, T1z);
Chris@82 319 T1K = T1G - T1J;
Chris@82 320 T1L = FMA(KP980785280, T1K, T1D);
Chris@82 321 T21 = FNMS(KP980785280, T1K, T1D);
Chris@82 322 T2v = FNMS(KP923879532, T2o, T2n);
Chris@82 323 T2w = T26 + T27;
Chris@82 324 T2x = FNMS(KP980785280, T2w, T2v);
Chris@82 325 T2z = FMA(KP980785280, T2w, T2v);
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E T1S, T1Z, T22, T23;
Chris@82 329 T1S = FMA(KP820678790, T1R, T1O);
Chris@82 330 T1Z = FNMS(KP820678790, T1Y, T1V);
Chris@82 331 T20 = T1S + T1Z;
Chris@82 332 T2A = T1Z - T1S;
Chris@82 333 T22 = FMA(KP820678790, T1V, T1Y);
Chris@82 334 T23 = FNMS(KP820678790, T1O, T1R);
Chris@82 335 T24 = T22 - T23;
Chris@82 336 T2y = T23 + T22;
Chris@82 337 }
Chris@82 338 Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L);
Chris@82 339 Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x);
Chris@82 340 Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L);
Chris@82 341 Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x);
Chris@82 342 Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21);
Chris@82 343 Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z);
Chris@82 344 Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21);
Chris@82 345 Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z);
Chris@82 346 }
Chris@82 347 }
Chris@82 348 }
Chris@82 349 }
Chris@82 350
Chris@82 351 static const kr2c_desc desc = { 32, "r2cfII_32", {46, 0, 128, 0}, &GENUS };
Chris@82 352
Chris@82 353 void X(codelet_r2cfII_32) (planner *p) {
Chris@82 354 X(kr2c_register) (p, r2cfII_32, &desc);
Chris@82 355 }
Chris@82 356
Chris@82 357 #else
Chris@82 358
Chris@82 359 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 360
Chris@82 361 /*
Chris@82 362 * This function contains 174 FP additions, 82 FP multiplications,
Chris@82 363 * (or, 138 additions, 46 multiplications, 36 fused multiply/add),
Chris@82 364 * 62 stack variables, 15 constants, and 64 memory accesses
Chris@82 365 */
Chris@82 366 #include "rdft/scalar/r2cfII.h"
Chris@82 367
Chris@82 368 static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 369 {
Chris@82 370 DK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@82 371 DK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@82 372 DK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@82 373 DK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@82 374 DK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@82 375 DK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@82 376 DK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@82 377 DK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@82 378 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 379 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 380 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 381 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 382 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 383 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 384 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 385 {
Chris@82 386 INT i;
Chris@82 387 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 388 E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
Chris@82 389 E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
Chris@82 390 E T1O, T2a;
Chris@82 391 {
Chris@82 392 E T1, T2p, T4, T2o, T2, T3;
Chris@82 393 T1 = R0[0];
Chris@82 394 T2p = R0[WS(rs, 8)];
Chris@82 395 T2 = R0[WS(rs, 4)];
Chris@82 396 T3 = R0[WS(rs, 12)];
Chris@82 397 T4 = KP707106781 * (T2 - T3);
Chris@82 398 T2o = KP707106781 * (T2 + T3);
Chris@82 399 T5 = T1 + T4;
Chris@82 400 T2D = T2p - T2o;
Chris@82 401 T1z = T1 - T4;
Chris@82 402 T2q = T2o + T2p;
Chris@82 403 }
Chris@82 404 {
Chris@82 405 E T8, T1A, Tb, T1B;
Chris@82 406 {
Chris@82 407 E T6, T7, T9, Ta;
Chris@82 408 T6 = R0[WS(rs, 2)];
Chris@82 409 T7 = R0[WS(rs, 10)];
Chris@82 410 T8 = FNMS(KP382683432, T7, KP923879532 * T6);
Chris@82 411 T1A = FMA(KP382683432, T6, KP923879532 * T7);
Chris@82 412 T9 = R0[WS(rs, 6)];
Chris@82 413 Ta = R0[WS(rs, 14)];
Chris@82 414 Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
Chris@82 415 T1B = FMA(KP923879532, T9, KP382683432 * Ta);
Chris@82 416 }
Chris@82 417 Tc = T8 + Tb;
Chris@82 418 T2C = Tb - T8;
Chris@82 419 T1C = T1A - T1B;
Chris@82 420 T2n = T1A + T1B;
Chris@82 421 }
Chris@82 422 {
Chris@82 423 E Te, Tk, Th, Tj, Tf, Tg;
Chris@82 424 Te = R0[WS(rs, 1)];
Chris@82 425 Tk = R0[WS(rs, 9)];
Chris@82 426 Tf = R0[WS(rs, 5)];
Chris@82 427 Tg = R0[WS(rs, 13)];
Chris@82 428 Th = KP707106781 * (Tf - Tg);
Chris@82 429 Tj = KP707106781 * (Tf + Tg);
Chris@82 430 {
Chris@82 431 E Ti, Tl, T1H, T1I;
Chris@82 432 Ti = Te + Th;
Chris@82 433 Tl = Tj + Tk;
Chris@82 434 Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
Chris@82 435 T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
Chris@82 436 T1H = Tk - Tj;
Chris@82 437 T1I = Te - Th;
Chris@82 438 T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
Chris@82 439 T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
Chris@82 440 }
Chris@82 441 }
Chris@82 442 {
Chris@82 443 E Tq, Tt, Tp, Ts, Tn, To;
Chris@82 444 Tq = R0[WS(rs, 15)];
Chris@82 445 Tt = R0[WS(rs, 7)];
Chris@82 446 Tn = R0[WS(rs, 3)];
Chris@82 447 To = R0[WS(rs, 11)];
Chris@82 448 Tp = KP707106781 * (Tn - To);
Chris@82 449 Ts = KP707106781 * (Tn + To);
Chris@82 450 {
Chris@82 451 E Tr, Tu, T1E, T1F;
Chris@82 452 Tr = Tp - Tq;
Chris@82 453 Tu = Ts + Tt;
Chris@82 454 Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
Chris@82 455 T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
Chris@82 456 T1E = Tt - Ts;
Chris@82 457 T1F = Tp + Tq;
Chris@82 458 T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
Chris@82 459 T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
Chris@82 460 }
Chris@82 461 }
Chris@82 462 {
Chris@82 463 E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
Chris@82 464 TW = R1[WS(rs, 15)];
Chris@82 465 T1a = R1[WS(rs, 7)];
Chris@82 466 TT = R1[WS(rs, 3)];
Chris@82 467 TU = R1[WS(rs, 11)];
Chris@82 468 TV = KP707106781 * (TT - TU);
Chris@82 469 T19 = KP707106781 * (TT + TU);
Chris@82 470 {
Chris@82 471 E TY, TZ, T11, T12;
Chris@82 472 TY = R1[WS(rs, 1)];
Chris@82 473 TZ = R1[WS(rs, 9)];
Chris@82 474 T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
Chris@82 475 T16 = FMA(KP382683432, TY, KP923879532 * TZ);
Chris@82 476 T11 = R1[WS(rs, 5)];
Chris@82 477 T12 = R1[WS(rs, 13)];
Chris@82 478 T13 = FNMS(KP923879532, T12, KP382683432 * T11);
Chris@82 479 T17 = FMA(KP923879532, T11, KP382683432 * T12);
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E TX, T14, T1W, T1X;
Chris@82 483 TX = TV - TW;
Chris@82 484 T14 = T10 + T13;
Chris@82 485 T15 = TX + T14;
Chris@82 486 T1r = TX - T14;
Chris@82 487 T1W = T13 - T10;
Chris@82 488 T1X = T1a - T19;
Chris@82 489 T1Y = T1W - T1X;
Chris@82 490 T2e = T1W + T1X;
Chris@82 491 }
Chris@82 492 {
Chris@82 493 E T18, T1b, T1T, T1U;
Chris@82 494 T18 = T16 + T17;
Chris@82 495 T1b = T19 + T1a;
Chris@82 496 T1c = T18 + T1b;
Chris@82 497 T1s = T1b - T18;
Chris@82 498 T1T = TV + TW;
Chris@82 499 T1U = T16 - T17;
Chris@82 500 T1V = T1T + T1U;
Chris@82 501 T2d = T1U - T1T;
Chris@82 502 }
Chris@82 503 }
Chris@82 504 {
Chris@82 505 E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
Chris@82 506 Ty = R1[0];
Chris@82 507 TP = R1[WS(rs, 8)];
Chris@82 508 Tz = R1[WS(rs, 4)];
Chris@82 509 TA = R1[WS(rs, 12)];
Chris@82 510 TB = KP707106781 * (Tz - TA);
Chris@82 511 TO = KP707106781 * (Tz + TA);
Chris@82 512 {
Chris@82 513 E TD, TE, TG, TH;
Chris@82 514 TD = R1[WS(rs, 2)];
Chris@82 515 TE = R1[WS(rs, 10)];
Chris@82 516 TF = FNMS(KP382683432, TE, KP923879532 * TD);
Chris@82 517 TL = FMA(KP382683432, TD, KP923879532 * TE);
Chris@82 518 TG = R1[WS(rs, 6)];
Chris@82 519 TH = R1[WS(rs, 14)];
Chris@82 520 TI = FNMS(KP923879532, TH, KP382683432 * TG);
Chris@82 521 TM = FMA(KP923879532, TG, KP382683432 * TH);
Chris@82 522 }
Chris@82 523 {
Chris@82 524 E TC, TJ, T1P, T1Q;
Chris@82 525 TC = Ty + TB;
Chris@82 526 TJ = TF + TI;
Chris@82 527 TK = TC + TJ;
Chris@82 528 T1o = TC - TJ;
Chris@82 529 T1P = TI - TF;
Chris@82 530 T1Q = TP - TO;
Chris@82 531 T1R = T1P - T1Q;
Chris@82 532 T2b = T1P + T1Q;
Chris@82 533 }
Chris@82 534 {
Chris@82 535 E TN, TQ, T1M, T1N;
Chris@82 536 TN = TL + TM;
Chris@82 537 TQ = TO + TP;
Chris@82 538 TR = TN + TQ;
Chris@82 539 T1p = TQ - TN;
Chris@82 540 T1M = Ty - TB;
Chris@82 541 T1N = TL - TM;
Chris@82 542 T1O = T1M - T1N;
Chris@82 543 T2a = T1M + T1N;
Chris@82 544 }
Chris@82 545 }
Chris@82 546 {
Chris@82 547 E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
Chris@82 548 {
Chris@82 549 E Td, Tw, T2m, T2r;
Chris@82 550 Td = T5 + Tc;
Chris@82 551 Tw = Tm + Tv;
Chris@82 552 Tx = Td - Tw;
Chris@82 553 T1f = Td + Tw;
Chris@82 554 T2m = T1l - T1k;
Chris@82 555 T2r = T2n + T2q;
Chris@82 556 T2s = T2m - T2r;
Chris@82 557 T2u = T2m + T2r;
Chris@82 558 }
Chris@82 559 {
Chris@82 560 E TS, T1d, T1g, T1h;
Chris@82 561 TS = FMA(KP098017140, TK, KP995184726 * TR);
Chris@82 562 T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
Chris@82 563 T1e = TS + T1d;
Chris@82 564 T2l = T1d - TS;
Chris@82 565 T1g = FNMS(KP098017140, TR, KP995184726 * TK);
Chris@82 566 T1h = FMA(KP995184726, T15, KP098017140 * T1c);
Chris@82 567 T1i = T1g + T1h;
Chris@82 568 T2t = T1h - T1g;
Chris@82 569 }
Chris@82 570 Cr[WS(csr, 8)] = Tx - T1e;
Chris@82 571 Ci[WS(csi, 8)] = T2t - T2u;
Chris@82 572 Cr[WS(csr, 7)] = Tx + T1e;
Chris@82 573 Ci[WS(csi, 7)] = T2t + T2u;
Chris@82 574 Cr[WS(csr, 15)] = T1f - T1i;
Chris@82 575 Ci[WS(csi, 15)] = T2l - T2s;
Chris@82 576 Cr[0] = T1f + T1i;
Chris@82 577 Ci[0] = T2l + T2s;
Chris@82 578 }
Chris@82 579 {
Chris@82 580 E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
Chris@82 581 {
Chris@82 582 E T25, T28, T2K, T2L;
Chris@82 583 T25 = T1z + T1C;
Chris@82 584 T28 = T26 - T27;
Chris@82 585 T29 = T25 + T28;
Chris@82 586 T2h = T25 - T28;
Chris@82 587 T2K = T1J + T1G;
Chris@82 588 T2L = T2C + T2D;
Chris@82 589 T2M = T2K - T2L;
Chris@82 590 T2O = T2K + T2L;
Chris@82 591 }
Chris@82 592 {
Chris@82 593 E T2c, T2f, T2i, T2j;
Chris@82 594 T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
Chris@82 595 T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
Chris@82 596 T2g = T2c + T2f;
Chris@82 597 T2J = T2f - T2c;
Chris@82 598 T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
Chris@82 599 T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
Chris@82 600 T2k = T2i - T2j;
Chris@82 601 T2N = T2j + T2i;
Chris@82 602 }
Chris@82 603 Cr[WS(csr, 14)] = T29 - T2g;
Chris@82 604 Ci[WS(csi, 14)] = T2N - T2O;
Chris@82 605 Cr[WS(csr, 1)] = T29 + T2g;
Chris@82 606 Ci[WS(csi, 1)] = T2N + T2O;
Chris@82 607 Cr[WS(csr, 9)] = T2h - T2k;
Chris@82 608 Ci[WS(csi, 9)] = T2J - T2M;
Chris@82 609 Cr[WS(csr, 6)] = T2h + T2k;
Chris@82 610 Ci[WS(csi, 6)] = T2J + T2M;
Chris@82 611 }
Chris@82 612 {
Chris@82 613 E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
Chris@82 614 {
Chris@82 615 E T1j, T1m, T2w, T2x;
Chris@82 616 T1j = T5 - Tc;
Chris@82 617 T1m = T1k + T1l;
Chris@82 618 T1n = T1j + T1m;
Chris@82 619 T1v = T1j - T1m;
Chris@82 620 T2w = Tv - Tm;
Chris@82 621 T2x = T2q - T2n;
Chris@82 622 T2y = T2w - T2x;
Chris@82 623 T2A = T2w + T2x;
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T1q, T1t, T1w, T1x;
Chris@82 627 T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
Chris@82 628 T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
Chris@82 629 T1u = T1q + T1t;
Chris@82 630 T2v = T1t - T1q;
Chris@82 631 T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
Chris@82 632 T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
Chris@82 633 T1y = T1w - T1x;
Chris@82 634 T2z = T1x + T1w;
Chris@82 635 }
Chris@82 636 Cr[WS(csr, 12)] = T1n - T1u;
Chris@82 637 Ci[WS(csi, 12)] = T2z - T2A;
Chris@82 638 Cr[WS(csr, 3)] = T1n + T1u;
Chris@82 639 Ci[WS(csi, 3)] = T2z + T2A;
Chris@82 640 Cr[WS(csr, 11)] = T1v - T1y;
Chris@82 641 Ci[WS(csi, 11)] = T2v - T2y;
Chris@82 642 Cr[WS(csr, 4)] = T1v + T1y;
Chris@82 643 Ci[WS(csi, 4)] = T2v + T2y;
Chris@82 644 }
Chris@82 645 {
Chris@82 646 E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
Chris@82 647 {
Chris@82 648 E T1D, T1K, T2E, T2F;
Chris@82 649 T1D = T1z - T1C;
Chris@82 650 T1K = T1G - T1J;
Chris@82 651 T1L = T1D + T1K;
Chris@82 652 T21 = T1D - T1K;
Chris@82 653 T2E = T2C - T2D;
Chris@82 654 T2F = T26 + T27;
Chris@82 655 T2G = T2E - T2F;
Chris@82 656 T2I = T2F + T2E;
Chris@82 657 }
Chris@82 658 {
Chris@82 659 E T1S, T1Z, T22, T23;
Chris@82 660 T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
Chris@82 661 T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
Chris@82 662 T20 = T1S - T1Z;
Chris@82 663 T2H = T1S + T1Z;
Chris@82 664 T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
Chris@82 665 T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
Chris@82 666 T24 = T22 - T23;
Chris@82 667 T2B = T23 + T22;
Chris@82 668 }
Chris@82 669 Cr[WS(csr, 13)] = T1L - T20;
Chris@82 670 Ci[WS(csi, 13)] = T2B - T2G;
Chris@82 671 Cr[WS(csr, 2)] = T1L + T20;
Chris@82 672 Ci[WS(csi, 2)] = T2B + T2G;
Chris@82 673 Cr[WS(csr, 10)] = T21 - T24;
Chris@82 674 Ci[WS(csi, 10)] = T2I - T2H;
Chris@82 675 Cr[WS(csr, 5)] = T21 + T24;
Chris@82 676 Ci[WS(csi, 5)] = -(T2H + T2I);
Chris@82 677 }
Chris@82 678 }
Chris@82 679 }
Chris@82 680 }
Chris@82 681
Chris@82 682 static const kr2c_desc desc = { 32, "r2cfII_32", {138, 46, 36, 0}, &GENUS };
Chris@82 683
Chris@82 684 void X(codelet_r2cfII_32) (planner *p) {
Chris@82 685 X(kr2c_register) (p, r2cfII_32, &desc);
Chris@82 686 }
Chris@82 687
Chris@82 688 #endif