annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cbIII_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:45 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 174 FP additions, 100 FP multiplications,
Chris@82 32 * (or, 106 additions, 32 multiplications, 68 fused multiply/add),
Chris@82 33 * 65 stack variables, 18 constants, and 64 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cbIII.h"
Chris@82 36
Chris@82 37 static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@82 40 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@82 41 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@82 42 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@82 43 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@82 44 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@82 45 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@82 46 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@82 47 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 48 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@82 49 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 50 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@82 51 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 52 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@82 53 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 54 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 55 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 56 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 57 {
Chris@82 58 INT i;
Chris@82 59 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 60 E T7, T2i, T2E, Tz, T1e, T1I, T1Z, T1x, Te, T22, T2F, T2j, T1h, T1y, TK;
Chris@82 61 E T1J, Tm, T2B, TW, T1k, T1C, T1M, T28, T2m, Tt, T2A, T17, T1j, T1F, T1L;
Chris@82 62 E T2d, T2l;
Chris@82 63 {
Chris@82 64 E T3, Tv, T1d, T2g, T6, T1a, Ty, T2h;
Chris@82 65 {
Chris@82 66 E T1, T2, T1b, T1c;
Chris@82 67 T1 = Cr[0];
Chris@82 68 T2 = Cr[WS(csr, 15)];
Chris@82 69 T3 = T1 + T2;
Chris@82 70 Tv = T1 - T2;
Chris@82 71 T1b = Ci[0];
Chris@82 72 T1c = Ci[WS(csi, 15)];
Chris@82 73 T1d = T1b + T1c;
Chris@82 74 T2g = T1c - T1b;
Chris@82 75 }
Chris@82 76 {
Chris@82 77 E T4, T5, Tw, Tx;
Chris@82 78 T4 = Cr[WS(csr, 8)];
Chris@82 79 T5 = Cr[WS(csr, 7)];
Chris@82 80 T6 = T4 + T5;
Chris@82 81 T1a = T4 - T5;
Chris@82 82 Tw = Ci[WS(csi, 8)];
Chris@82 83 Tx = Ci[WS(csi, 7)];
Chris@82 84 Ty = Tw + Tx;
Chris@82 85 T2h = Tx - Tw;
Chris@82 86 }
Chris@82 87 T7 = T3 + T6;
Chris@82 88 T2i = T2g - T2h;
Chris@82 89 T2E = T2h + T2g;
Chris@82 90 Tz = Tv - Ty;
Chris@82 91 T1e = T1a + T1d;
Chris@82 92 T1I = T1a - T1d;
Chris@82 93 T1Z = T3 - T6;
Chris@82 94 T1x = Tv + Ty;
Chris@82 95 }
Chris@82 96 {
Chris@82 97 E Ta, TA, TD, T20, Td, TF, TI, T21;
Chris@82 98 {
Chris@82 99 E T8, T9, TB, TC;
Chris@82 100 T8 = Cr[WS(csr, 4)];
Chris@82 101 T9 = Cr[WS(csr, 11)];
Chris@82 102 Ta = T8 + T9;
Chris@82 103 TA = T8 - T9;
Chris@82 104 TB = Ci[WS(csi, 4)];
Chris@82 105 TC = Ci[WS(csi, 11)];
Chris@82 106 TD = TB + TC;
Chris@82 107 T20 = TC - TB;
Chris@82 108 }
Chris@82 109 {
Chris@82 110 E Tb, Tc, TG, TH;
Chris@82 111 Tb = Cr[WS(csr, 3)];
Chris@82 112 Tc = Cr[WS(csr, 12)];
Chris@82 113 Td = Tb + Tc;
Chris@82 114 TF = Tb - Tc;
Chris@82 115 TG = Ci[WS(csi, 3)];
Chris@82 116 TH = Ci[WS(csi, 12)];
Chris@82 117 TI = TG + TH;
Chris@82 118 T21 = TG - TH;
Chris@82 119 }
Chris@82 120 Te = Ta + Td;
Chris@82 121 T22 = T20 - T21;
Chris@82 122 T2F = T20 + T21;
Chris@82 123 T2j = Ta - Td;
Chris@82 124 {
Chris@82 125 E T1f, T1g, TE, TJ;
Chris@82 126 T1f = TF + TI;
Chris@82 127 T1g = TA + TD;
Chris@82 128 T1h = T1f - T1g;
Chris@82 129 T1y = T1g + T1f;
Chris@82 130 TE = TA - TD;
Chris@82 131 TJ = TF - TI;
Chris@82 132 TK = TE + TJ;
Chris@82 133 T1J = TE - TJ;
Chris@82 134 }
Chris@82 135 }
Chris@82 136 {
Chris@82 137 E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV;
Chris@82 138 {
Chris@82 139 E Tg, Th, TS, TT;
Chris@82 140 Tg = Cr[WS(csr, 2)];
Chris@82 141 Th = Cr[WS(csr, 13)];
Chris@82 142 Ti = Tg + Th;
Chris@82 143 TM = Tg - Th;
Chris@82 144 TS = Ci[WS(csi, 2)];
Chris@82 145 TT = Ci[WS(csi, 13)];
Chris@82 146 TU = TS + TT;
Chris@82 147 T25 = TS - TT;
Chris@82 148 }
Chris@82 149 {
Chris@82 150 E Tj, Tk, TN, TO;
Chris@82 151 Tj = Cr[WS(csr, 10)];
Chris@82 152 Tk = Cr[WS(csr, 5)];
Chris@82 153 Tl = Tj + Tk;
Chris@82 154 TR = Tj - Tk;
Chris@82 155 TN = Ci[WS(csi, 10)];
Chris@82 156 TO = Ci[WS(csi, 5)];
Chris@82 157 TP = TN + TO;
Chris@82 158 T26 = TN - TO;
Chris@82 159 }
Chris@82 160 Tm = Ti + Tl;
Chris@82 161 T2B = T26 + T25;
Chris@82 162 TQ = TM - TP;
Chris@82 163 TV = TR + TU;
Chris@82 164 TW = FNMS(KP414213562, TV, TQ);
Chris@82 165 T1k = FMA(KP414213562, TQ, TV);
Chris@82 166 {
Chris@82 167 E T1A, T1B, T24, T27;
Chris@82 168 T1A = TR - TU;
Chris@82 169 T1B = TM + TP;
Chris@82 170 T1C = FMA(KP414213562, T1B, T1A);
Chris@82 171 T1M = FNMS(KP414213562, T1A, T1B);
Chris@82 172 T24 = Ti - Tl;
Chris@82 173 T27 = T25 - T26;
Chris@82 174 T28 = T24 - T27;
Chris@82 175 T2m = T24 + T27;
Chris@82 176 }
Chris@82 177 }
Chris@82 178 {
Chris@82 179 E Tp, TX, T14, T2a, Ts, T15, T10, T2b, T11, T16;
Chris@82 180 {
Chris@82 181 E Tn, To, T12, T13;
Chris@82 182 Tn = Cr[WS(csr, 1)];
Chris@82 183 To = Cr[WS(csr, 14)];
Chris@82 184 Tp = Tn + To;
Chris@82 185 TX = Tn - To;
Chris@82 186 T12 = Ci[WS(csi, 1)];
Chris@82 187 T13 = Ci[WS(csi, 14)];
Chris@82 188 T14 = T12 + T13;
Chris@82 189 T2a = T13 - T12;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E Tq, Tr, TY, TZ;
Chris@82 193 Tq = Cr[WS(csr, 6)];
Chris@82 194 Tr = Cr[WS(csr, 9)];
Chris@82 195 Ts = Tq + Tr;
Chris@82 196 T15 = Tq - Tr;
Chris@82 197 TY = Ci[WS(csi, 6)];
Chris@82 198 TZ = Ci[WS(csi, 9)];
Chris@82 199 T10 = TY + TZ;
Chris@82 200 T2b = TY - TZ;
Chris@82 201 }
Chris@82 202 Tt = Tp + Ts;
Chris@82 203 T2A = T2b + T2a;
Chris@82 204 T11 = TX - T10;
Chris@82 205 T16 = T14 - T15;
Chris@82 206 T17 = FNMS(KP414213562, T16, T11);
Chris@82 207 T1j = FMA(KP414213562, T11, T16);
Chris@82 208 {
Chris@82 209 E T1D, T1E, T29, T2c;
Chris@82 210 T1D = T15 + T14;
Chris@82 211 T1E = TX + T10;
Chris@82 212 T1F = FNMS(KP414213562, T1E, T1D);
Chris@82 213 T1L = FMA(KP414213562, T1D, T1E);
Chris@82 214 T29 = Tp - Ts;
Chris@82 215 T2c = T2a - T2b;
Chris@82 216 T2d = T29 + T2c;
Chris@82 217 T2l = T29 - T2c;
Chris@82 218 }
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E Tf, Tu, T2L, T2M, T2N, T2O;
Chris@82 222 Tf = T7 + Te;
Chris@82 223 Tu = Tm + Tt;
Chris@82 224 T2L = Tf - Tu;
Chris@82 225 T2M = T2B + T2A;
Chris@82 226 T2N = T2F + T2E;
Chris@82 227 T2O = T2M + T2N;
Chris@82 228 R0[0] = KP2_000000000 * (Tf + Tu);
Chris@82 229 R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M);
Chris@82 230 R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O);
Chris@82 231 R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L);
Chris@82 232 }
Chris@82 233 {
Chris@82 234 E T2t, T2y, T2w, T2x;
Chris@82 235 {
Chris@82 236 E T2r, T2s, T2u, T2v;
Chris@82 237 T2r = T1Z - T22;
Chris@82 238 T2s = T2m + T2l;
Chris@82 239 T2t = FNMS(KP707106781, T2s, T2r);
Chris@82 240 T2y = FMA(KP707106781, T2s, T2r);
Chris@82 241 T2u = T2j + T2i;
Chris@82 242 T2v = T28 - T2d;
Chris@82 243 T2w = FNMS(KP707106781, T2v, T2u);
Chris@82 244 T2x = FMA(KP707106781, T2v, T2u);
Chris@82 245 }
Chris@82 246 R0[WS(rs, 3)] = KP1_662939224 * (FMA(KP668178637, T2w, T2t));
Chris@82 247 R0[WS(rs, 15)] = -(KP1_961570560 * (FNMS(KP198912367, T2x, T2y)));
Chris@82 248 R0[WS(rs, 11)] = KP1_662939224 * (FNMS(KP668178637, T2t, T2w));
Chris@82 249 R0[WS(rs, 7)] = KP1_961570560 * (FMA(KP198912367, T2y, T2x));
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T2D, T2K, T2I, T2J;
Chris@82 253 {
Chris@82 254 E T2z, T2C, T2G, T2H;
Chris@82 255 T2z = T7 - Te;
Chris@82 256 T2C = T2A - T2B;
Chris@82 257 T2D = T2z + T2C;
Chris@82 258 T2K = T2z - T2C;
Chris@82 259 T2G = T2E - T2F;
Chris@82 260 T2H = Tm - Tt;
Chris@82 261 T2I = T2G - T2H;
Chris@82 262 T2J = T2H + T2G;
Chris@82 263 }
Chris@82 264 R0[WS(rs, 2)] = KP1_847759065 * (FMA(KP414213562, T2I, T2D));
Chris@82 265 R0[WS(rs, 14)] = -(KP1_847759065 * (FNMS(KP414213562, T2J, T2K)));
Chris@82 266 R0[WS(rs, 10)] = KP1_847759065 * (FNMS(KP414213562, T2D, T2I));
Chris@82 267 R0[WS(rs, 6)] = KP1_847759065 * (FMA(KP414213562, T2K, T2J));
Chris@82 268 }
Chris@82 269 {
Chris@82 270 E T19, T1o, T1m, T1n;
Chris@82 271 {
Chris@82 272 E TL, T18, T1i, T1l;
Chris@82 273 TL = FMA(KP707106781, TK, Tz);
Chris@82 274 T18 = TW + T17;
Chris@82 275 T19 = FMA(KP923879532, T18, TL);
Chris@82 276 T1o = FNMS(KP923879532, T18, TL);
Chris@82 277 T1i = FNMS(KP707106781, T1h, T1e);
Chris@82 278 T1l = T1j - T1k;
Chris@82 279 T1m = FNMS(KP923879532, T1l, T1i);
Chris@82 280 T1n = FMA(KP923879532, T1l, T1i);
Chris@82 281 }
Chris@82 282 R1[0] = KP1_990369453 * (FNMS(KP098491403, T1m, T19));
Chris@82 283 R1[WS(rs, 12)] = -(KP1_546020906 * (FMA(KP820678790, T1n, T1o)));
Chris@82 284 R1[WS(rs, 8)] = -(KP1_990369453 * (FMA(KP098491403, T19, T1m)));
Chris@82 285 R1[WS(rs, 4)] = -(KP1_546020906 * (FNMS(KP820678790, T1o, T1n)));
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T1r, T1w, T1u, T1v;
Chris@82 289 {
Chris@82 290 E T1p, T1q, T1s, T1t;
Chris@82 291 T1p = FNMS(KP707106781, TK, Tz);
Chris@82 292 T1q = T1k + T1j;
Chris@82 293 T1r = FNMS(KP923879532, T1q, T1p);
Chris@82 294 T1w = FMA(KP923879532, T1q, T1p);
Chris@82 295 T1s = FMA(KP707106781, T1h, T1e);
Chris@82 296 T1t = TW - T17;
Chris@82 297 T1u = FMA(KP923879532, T1t, T1s);
Chris@82 298 T1v = FNMS(KP923879532, T1t, T1s);
Chris@82 299 }
Chris@82 300 R1[WS(rs, 2)] = KP1_763842528 * (FNMS(KP534511135, T1u, T1r));
Chris@82 301 R1[WS(rs, 14)] = -(KP1_913880671 * (FMA(KP303346683, T1v, T1w)));
Chris@82 302 R1[WS(rs, 10)] = -(KP1_763842528 * (FMA(KP534511135, T1r, T1u)));
Chris@82 303 R1[WS(rs, 6)] = -(KP1_913880671 * (FNMS(KP303346683, T1w, T1v)));
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E T1T, T1Y, T1W, T1X;
Chris@82 307 {
Chris@82 308 E T1R, T1S, T1U, T1V;
Chris@82 309 T1R = FMA(KP707106781, T1y, T1x);
Chris@82 310 T1S = T1M + T1L;
Chris@82 311 T1T = FNMS(KP923879532, T1S, T1R);
Chris@82 312 T1Y = FMA(KP923879532, T1S, T1R);
Chris@82 313 T1U = FMA(KP707106781, T1J, T1I);
Chris@82 314 T1V = T1C + T1F;
Chris@82 315 T1W = FNMS(KP923879532, T1V, T1U);
Chris@82 316 T1X = FMA(KP923879532, T1V, T1U);
Chris@82 317 }
Chris@82 318 R1[WS(rs, 3)] = KP1_546020906 * (FMA(KP820678790, T1W, T1T));
Chris@82 319 R1[WS(rs, 15)] = -(KP1_990369453 * (FNMS(KP098491403, T1X, T1Y)));
Chris@82 320 R1[WS(rs, 11)] = KP1_546020906 * (FNMS(KP820678790, T1T, T1W));
Chris@82 321 R1[WS(rs, 7)] = KP1_990369453 * (FMA(KP098491403, T1Y, T1X));
Chris@82 322 }
Chris@82 323 {
Chris@82 324 E T2f, T2q, T2o, T2p;
Chris@82 325 {
Chris@82 326 E T23, T2e, T2k, T2n;
Chris@82 327 T23 = T1Z + T22;
Chris@82 328 T2e = T28 + T2d;
Chris@82 329 T2f = FMA(KP707106781, T2e, T23);
Chris@82 330 T2q = FNMS(KP707106781, T2e, T23);
Chris@82 331 T2k = T2i - T2j;
Chris@82 332 T2n = T2l - T2m;
Chris@82 333 T2o = FMA(KP707106781, T2n, T2k);
Chris@82 334 T2p = FNMS(KP707106781, T2n, T2k);
Chris@82 335 }
Chris@82 336 R0[WS(rs, 1)] = KP1_961570560 * (FMA(KP198912367, T2o, T2f));
Chris@82 337 R0[WS(rs, 13)] = -(KP1_662939224 * (FNMS(KP668178637, T2p, T2q)));
Chris@82 338 R0[WS(rs, 9)] = KP1_961570560 * (FNMS(KP198912367, T2f, T2o));
Chris@82 339 R0[WS(rs, 5)] = KP1_662939224 * (FMA(KP668178637, T2q, T2p));
Chris@82 340 }
Chris@82 341 {
Chris@82 342 E T1H, T1Q, T1O, T1P;
Chris@82 343 {
Chris@82 344 E T1z, T1G, T1K, T1N;
Chris@82 345 T1z = FNMS(KP707106781, T1y, T1x);
Chris@82 346 T1G = T1C - T1F;
Chris@82 347 T1H = FMA(KP923879532, T1G, T1z);
Chris@82 348 T1Q = FNMS(KP923879532, T1G, T1z);
Chris@82 349 T1K = FNMS(KP707106781, T1J, T1I);
Chris@82 350 T1N = T1L - T1M;
Chris@82 351 T1O = FMA(KP923879532, T1N, T1K);
Chris@82 352 T1P = FNMS(KP923879532, T1N, T1K);
Chris@82 353 }
Chris@82 354 R1[WS(rs, 1)] = KP1_913880671 * (FMA(KP303346683, T1O, T1H));
Chris@82 355 R1[WS(rs, 13)] = -(KP1_763842528 * (FNMS(KP534511135, T1P, T1Q)));
Chris@82 356 R1[WS(rs, 9)] = KP1_913880671 * (FNMS(KP303346683, T1H, T1O));
Chris@82 357 R1[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T1Q, T1P));
Chris@82 358 }
Chris@82 359 }
Chris@82 360 }
Chris@82 361 }
Chris@82 362
Chris@82 363 static const kr2c_desc desc = { 32, "r2cbIII_32", {106, 32, 68, 0}, &GENUS };
Chris@82 364
Chris@82 365 void X(codelet_r2cbIII_32) (planner *p) {
Chris@82 366 X(kr2c_register) (p, r2cbIII_32, &desc);
Chris@82 367 }
Chris@82 368
Chris@82 369 #else
Chris@82 370
Chris@82 371 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 372
Chris@82 373 /*
Chris@82 374 * This function contains 174 FP additions, 84 FP multiplications,
Chris@82 375 * (or, 138 additions, 48 multiplications, 36 fused multiply/add),
Chris@82 376 * 66 stack variables, 19 constants, and 64 memory accesses
Chris@82 377 */
Chris@82 378 #include "rdft/scalar/r2cbIII.h"
Chris@82 379
Chris@82 380 static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 381 {
Chris@82 382 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@82 383 DK(KP580569354, +0.580569354508924735272384751634790549382952557);
Chris@82 384 DK(KP942793473, +0.942793473651995297112775251810508755314920638);
Chris@82 385 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@82 386 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@82 387 DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
Chris@82 388 DK(KP196034280, +0.196034280659121203988391127777283691722273346);
Chris@82 389 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@82 390 DK(KP765366864, +0.765366864730179543456919968060797733522689125);
Chris@82 391 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@82 392 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@82 393 DK(KP390180644, +0.390180644032256535696569736954044481855383236);
Chris@82 394 DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
Chris@82 395 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@82 396 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 397 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 398 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 399 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 400 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 401 {
Chris@82 402 INT i;
Chris@82 403 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 404 E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK;
Chris@82 405 E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M;
Chris@82 406 E T2d, T2m;
Chris@82 407 {
Chris@82 408 E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g;
Chris@82 409 {
Chris@82 410 E T1, T2, T1h, T1i;
Chris@82 411 T1 = Cr[0];
Chris@82 412 T2 = Cr[WS(csr, 15)];
Chris@82 413 T3 = T1 + T2;
Chris@82 414 Tv = T1 - T2;
Chris@82 415 T1h = Ci[0];
Chris@82 416 T1i = Ci[WS(csi, 15)];
Chris@82 417 T1j = T1h + T1i;
Chris@82 418 T2h = T1i - T1h;
Chris@82 419 }
Chris@82 420 {
Chris@82 421 E T4, T5, Tw, Tx;
Chris@82 422 T4 = Cr[WS(csr, 8)];
Chris@82 423 T5 = Cr[WS(csr, 7)];
Chris@82 424 T6 = T4 + T5;
Chris@82 425 T1g = T4 - T5;
Chris@82 426 Tw = Ci[WS(csi, 8)];
Chris@82 427 Tx = Ci[WS(csi, 7)];
Chris@82 428 Ty = Tw + Tx;
Chris@82 429 T2g = Tw - Tx;
Chris@82 430 }
Chris@82 431 T7 = T3 + T6;
Chris@82 432 T2i = T2g + T2h;
Chris@82 433 T2F = T2h - T2g;
Chris@82 434 Tz = Tv - Ty;
Chris@82 435 T1k = T1g + T1j;
Chris@82 436 T1I = T1g - T1j;
Chris@82 437 T1Z = T3 - T6;
Chris@82 438 T1x = Tv + Ty;
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E Ta, TA, TD, T21, Td, TF, TI, T20;
Chris@82 442 {
Chris@82 443 E T8, T9, TB, TC;
Chris@82 444 T8 = Cr[WS(csr, 4)];
Chris@82 445 T9 = Cr[WS(csr, 11)];
Chris@82 446 Ta = T8 + T9;
Chris@82 447 TA = T8 - T9;
Chris@82 448 TB = Ci[WS(csi, 4)];
Chris@82 449 TC = Ci[WS(csi, 11)];
Chris@82 450 TD = TB + TC;
Chris@82 451 T21 = TB - TC;
Chris@82 452 }
Chris@82 453 {
Chris@82 454 E Tb, Tc, TG, TH;
Chris@82 455 Tb = Cr[WS(csr, 3)];
Chris@82 456 Tc = Cr[WS(csr, 12)];
Chris@82 457 Td = Tb + Tc;
Chris@82 458 TF = Tb - Tc;
Chris@82 459 TG = Ci[WS(csi, 3)];
Chris@82 460 TH = Ci[WS(csi, 12)];
Chris@82 461 TI = TG + TH;
Chris@82 462 T20 = TH - TG;
Chris@82 463 }
Chris@82 464 Te = Ta + Td;
Chris@82 465 T22 = T20 - T21;
Chris@82 466 T2E = T21 + T20;
Chris@82 467 T2j = Ta - Td;
Chris@82 468 {
Chris@82 469 E T1d, T1e, TE, TJ;
Chris@82 470 T1d = TA + TD;
Chris@82 471 T1e = TF + TI;
Chris@82 472 T1f = KP707106781 * (T1d - T1e);
Chris@82 473 T1y = KP707106781 * (T1d + T1e);
Chris@82 474 TE = TA - TD;
Chris@82 475 TJ = TF - TI;
Chris@82 476 TK = KP707106781 * (TE + TJ);
Chris@82 477 T1J = KP707106781 * (TE - TJ);
Chris@82 478 }
Chris@82 479 }
Chris@82 480 {
Chris@82 481 E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV;
Chris@82 482 {
Chris@82 483 E Tg, Th, TS, TT;
Chris@82 484 Tg = Cr[WS(csr, 2)];
Chris@82 485 Th = Cr[WS(csr, 13)];
Chris@82 486 Ti = Tg + Th;
Chris@82 487 TM = Tg - Th;
Chris@82 488 TS = Ci[WS(csi, 2)];
Chris@82 489 TT = Ci[WS(csi, 13)];
Chris@82 490 TU = TS + TT;
Chris@82 491 T25 = TS - TT;
Chris@82 492 }
Chris@82 493 {
Chris@82 494 E Tj, Tk, TN, TO;
Chris@82 495 Tj = Cr[WS(csr, 10)];
Chris@82 496 Tk = Cr[WS(csr, 5)];
Chris@82 497 Tl = Tj + Tk;
Chris@82 498 TR = Tj - Tk;
Chris@82 499 TN = Ci[WS(csi, 10)];
Chris@82 500 TO = Ci[WS(csi, 5)];
Chris@82 501 TP = TN + TO;
Chris@82 502 T26 = TN - TO;
Chris@82 503 }
Chris@82 504 Tm = Ti + Tl;
Chris@82 505 T2B = T26 + T25;
Chris@82 506 TQ = TM - TP;
Chris@82 507 TV = TR + TU;
Chris@82 508 TW = FNMS(KP382683432, TV, KP923879532 * TQ);
Chris@82 509 T1a = FMA(KP382683432, TQ, KP923879532 * TV);
Chris@82 510 {
Chris@82 511 E T1A, T1B, T24, T27;
Chris@82 512 T1A = TM + TP;
Chris@82 513 T1B = TU - TR;
Chris@82 514 T1C = FNMS(KP923879532, T1B, KP382683432 * T1A);
Chris@82 515 T1L = FMA(KP923879532, T1A, KP382683432 * T1B);
Chris@82 516 T24 = Ti - Tl;
Chris@82 517 T27 = T25 - T26;
Chris@82 518 T28 = T24 - T27;
Chris@82 519 T2l = T24 + T27;
Chris@82 520 }
Chris@82 521 }
Chris@82 522 {
Chris@82 523 E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16;
Chris@82 524 {
Chris@82 525 E Tn, To, T13, T14;
Chris@82 526 Tn = Cr[WS(csr, 1)];
Chris@82 527 To = Cr[WS(csr, 14)];
Chris@82 528 Tp = Tn + To;
Chris@82 529 TX = Tn - To;
Chris@82 530 T13 = Ci[WS(csi, 1)];
Chris@82 531 T14 = Ci[WS(csi, 14)];
Chris@82 532 T15 = T13 + T14;
Chris@82 533 T2a = T14 - T13;
Chris@82 534 }
Chris@82 535 {
Chris@82 536 E Tq, Tr, TY, TZ;
Chris@82 537 Tq = Cr[WS(csr, 6)];
Chris@82 538 Tr = Cr[WS(csr, 9)];
Chris@82 539 Ts = Tq + Tr;
Chris@82 540 T12 = Tq - Tr;
Chris@82 541 TY = Ci[WS(csi, 6)];
Chris@82 542 TZ = Ci[WS(csi, 9)];
Chris@82 543 T10 = TY + TZ;
Chris@82 544 T2b = TY - TZ;
Chris@82 545 }
Chris@82 546 Tt = Tp + Ts;
Chris@82 547 T2A = T2b + T2a;
Chris@82 548 T11 = TX - T10;
Chris@82 549 T16 = T12 - T15;
Chris@82 550 T17 = FMA(KP923879532, T11, KP382683432 * T16);
Chris@82 551 T1b = FNMS(KP382683432, T11, KP923879532 * T16);
Chris@82 552 {
Chris@82 553 E T1D, T1E, T29, T2c;
Chris@82 554 T1D = TX + T10;
Chris@82 555 T1E = T12 + T15;
Chris@82 556 T1F = FNMS(KP923879532, T1E, KP382683432 * T1D);
Chris@82 557 T1M = FMA(KP923879532, T1D, KP382683432 * T1E);
Chris@82 558 T29 = Tp - Ts;
Chris@82 559 T2c = T2a - T2b;
Chris@82 560 T2d = T29 + T2c;
Chris@82 561 T2m = T2c - T29;
Chris@82 562 }
Chris@82 563 }
Chris@82 564 {
Chris@82 565 E Tf, Tu, T2L, T2M, T2N, T2O;
Chris@82 566 Tf = T7 + Te;
Chris@82 567 Tu = Tm + Tt;
Chris@82 568 T2L = Tf - Tu;
Chris@82 569 T2M = T2B + T2A;
Chris@82 570 T2N = T2F - T2E;
Chris@82 571 T2O = T2M + T2N;
Chris@82 572 R0[0] = KP2_000000000 * (Tf + Tu);
Chris@82 573 R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M);
Chris@82 574 R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O);
Chris@82 575 R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L);
Chris@82 576 }
Chris@82 577 {
Chris@82 578 E T2t, T2x, T2w, T2y;
Chris@82 579 {
Chris@82 580 E T2r, T2s, T2u, T2v;
Chris@82 581 T2r = T1Z - T22;
Chris@82 582 T2s = KP707106781 * (T2m - T2l);
Chris@82 583 T2t = T2r + T2s;
Chris@82 584 T2x = T2r - T2s;
Chris@82 585 T2u = T2j + T2i;
Chris@82 586 T2v = KP707106781 * (T28 - T2d);
Chris@82 587 T2w = T2u - T2v;
Chris@82 588 T2y = T2v + T2u;
Chris@82 589 }
Chris@82 590 R0[WS(rs, 3)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w);
Chris@82 591 R0[WS(rs, 15)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y);
Chris@82 592 R0[WS(rs, 11)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w);
Chris@82 593 R0[WS(rs, 7)] = FMA(KP390180644, T2x, KP1_961570560 * T2y);
Chris@82 594 }
Chris@82 595 {
Chris@82 596 E T2D, T2J, T2I, T2K;
Chris@82 597 {
Chris@82 598 E T2z, T2C, T2G, T2H;
Chris@82 599 T2z = T7 - Te;
Chris@82 600 T2C = T2A - T2B;
Chris@82 601 T2D = T2z + T2C;
Chris@82 602 T2J = T2z - T2C;
Chris@82 603 T2G = T2E + T2F;
Chris@82 604 T2H = Tm - Tt;
Chris@82 605 T2I = T2G - T2H;
Chris@82 606 T2K = T2H + T2G;
Chris@82 607 }
Chris@82 608 R0[WS(rs, 2)] = FMA(KP1_847759065, T2D, KP765366864 * T2I);
Chris@82 609 R0[WS(rs, 14)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K);
Chris@82 610 R0[WS(rs, 10)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I);
Chris@82 611 R0[WS(rs, 6)] = FMA(KP765366864, T2J, KP1_847759065 * T2K);
Chris@82 612 }
Chris@82 613 {
Chris@82 614 E T19, T1n, T1m, T1o;
Chris@82 615 {
Chris@82 616 E TL, T18, T1c, T1l;
Chris@82 617 TL = Tz + TK;
Chris@82 618 T18 = TW + T17;
Chris@82 619 T19 = TL + T18;
Chris@82 620 T1n = TL - T18;
Chris@82 621 T1c = T1a + T1b;
Chris@82 622 T1l = T1f + T1k;
Chris@82 623 T1m = T1c + T1l;
Chris@82 624 T1o = T1c - T1l;
Chris@82 625 }
Chris@82 626 R1[0] = FNMS(KP196034280, T1m, KP1_990369453 * T19);
Chris@82 627 R1[WS(rs, 12)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o);
Chris@82 628 R1[WS(rs, 8)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m));
Chris@82 629 R1[WS(rs, 4)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o);
Chris@82 630 }
Chris@82 631 {
Chris@82 632 E T1r, T1v, T1u, T1w;
Chris@82 633 {
Chris@82 634 E T1p, T1q, T1s, T1t;
Chris@82 635 T1p = Tz - TK;
Chris@82 636 T1q = T1b - T1a;
Chris@82 637 T1r = T1p + T1q;
Chris@82 638 T1v = T1p - T1q;
Chris@82 639 T1s = T1f - T1k;
Chris@82 640 T1t = TW - T17;
Chris@82 641 T1u = T1s - T1t;
Chris@82 642 T1w = T1t + T1s;
Chris@82 643 }
Chris@82 644 R1[WS(rs, 2)] = FMA(KP1_763842528, T1r, KP942793473 * T1u);
Chris@82 645 R1[WS(rs, 14)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w);
Chris@82 646 R1[WS(rs, 10)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u);
Chris@82 647 R1[WS(rs, 6)] = FMA(KP580569354, T1v, KP1_913880671 * T1w);
Chris@82 648 }
Chris@82 649 {
Chris@82 650 E T1T, T1X, T1W, T1Y;
Chris@82 651 {
Chris@82 652 E T1R, T1S, T1U, T1V;
Chris@82 653 T1R = T1x + T1y;
Chris@82 654 T1S = T1L + T1M;
Chris@82 655 T1T = T1R - T1S;
Chris@82 656 T1X = T1R + T1S;
Chris@82 657 T1U = T1J + T1I;
Chris@82 658 T1V = T1C - T1F;
Chris@82 659 T1W = T1U - T1V;
Chris@82 660 T1Y = T1V + T1U;
Chris@82 661 }
Chris@82 662 R1[WS(rs, 3)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W);
Chris@82 663 R1[WS(rs, 15)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y);
Chris@82 664 R1[WS(rs, 11)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W);
Chris@82 665 R1[WS(rs, 7)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y);
Chris@82 666 }
Chris@82 667 {
Chris@82 668 E T2f, T2p, T2o, T2q;
Chris@82 669 {
Chris@82 670 E T23, T2e, T2k, T2n;
Chris@82 671 T23 = T1Z + T22;
Chris@82 672 T2e = KP707106781 * (T28 + T2d);
Chris@82 673 T2f = T23 + T2e;
Chris@82 674 T2p = T23 - T2e;
Chris@82 675 T2k = T2i - T2j;
Chris@82 676 T2n = KP707106781 * (T2l + T2m);
Chris@82 677 T2o = T2k - T2n;
Chris@82 678 T2q = T2n + T2k;
Chris@82 679 }
Chris@82 680 R0[WS(rs, 1)] = FMA(KP1_961570560, T2f, KP390180644 * T2o);
Chris@82 681 R0[WS(rs, 13)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q);
Chris@82 682 R0[WS(rs, 9)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o);
Chris@82 683 R0[WS(rs, 5)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q);
Chris@82 684 }
Chris@82 685 {
Chris@82 686 E T1H, T1P, T1O, T1Q;
Chris@82 687 {
Chris@82 688 E T1z, T1G, T1K, T1N;
Chris@82 689 T1z = T1x - T1y;
Chris@82 690 T1G = T1C + T1F;
Chris@82 691 T1H = T1z + T1G;
Chris@82 692 T1P = T1z - T1G;
Chris@82 693 T1K = T1I - T1J;
Chris@82 694 T1N = T1L - T1M;
Chris@82 695 T1O = T1K - T1N;
Chris@82 696 T1Q = T1N + T1K;
Chris@82 697 }
Chris@82 698 R1[WS(rs, 1)] = FMA(KP1_913880671, T1H, KP580569354 * T1O);
Chris@82 699 R1[WS(rs, 13)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q);
Chris@82 700 R1[WS(rs, 9)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O);
Chris@82 701 R1[WS(rs, 5)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q);
Chris@82 702 }
Chris@82 703 }
Chris@82 704 }
Chris@82 705 }
Chris@82 706
Chris@82 707 static const kr2c_desc desc = { 32, "r2cbIII_32", {138, 48, 36, 0}, &GENUS };
Chris@82 708
Chris@82 709 void X(codelet_r2cbIII_32) (planner *p) {
Chris@82 710 X(kr2c_register) (p, r2cbIII_32, &desc);
Chris@82 711 }
Chris@82 712
Chris@82 713 #endif