annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:08 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 156 FP additions, 68 FP multiplications,
Chris@42 32 * (or, 88 additions, 0 multiplications, 68 fused multiply/add),
Chris@42 33 * 89 stack variables, 7 constants, and 64 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 40 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 41 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 44 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 45 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 46 {
Chris@42 47 INT i;
Chris@42 48 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@42 49 E T1x, T1M, T1I, T1E, T1J, T1H;
Chris@42 50 {
Chris@42 51 E Tv, T1h, T7, T2b, Te, T2n, Ty, T1i, T1l, TF, T2d, Tt, T1k, TC, T2c;
Chris@42 52 E Tm, T2j, T1Z, T2k, T22, TK, T1B, T19, T1C, T1e, TO, TV, T1T, TN, TP;
Chris@42 53 E T2g, T1S;
Chris@42 54 {
Chris@42 55 E TD, Tp, Tq, Tr;
Chris@42 56 {
Chris@42 57 E T1, T2, T4, T5;
Chris@42 58 T1 = R0[0];
Chris@42 59 T2 = R0[WS(rs, 8)];
Chris@42 60 T4 = R0[WS(rs, 4)];
Chris@42 61 T5 = R0[WS(rs, 12)];
Chris@42 62 {
Chris@42 63 E Ta, Tw, Tx, Td, Tn, To;
Chris@42 64 {
Chris@42 65 E T8, T3, T6, T9, Tb, Tc;
Chris@42 66 T8 = R0[WS(rs, 2)];
Chris@42 67 Tv = T1 - T2;
Chris@42 68 T3 = T1 + T2;
Chris@42 69 T1h = T4 - T5;
Chris@42 70 T6 = T4 + T5;
Chris@42 71 T9 = R0[WS(rs, 10)];
Chris@42 72 Tb = R0[WS(rs, 14)];
Chris@42 73 Tc = R0[WS(rs, 6)];
Chris@42 74 T7 = T3 + T6;
Chris@42 75 T2b = T3 - T6;
Chris@42 76 Ta = T8 + T9;
Chris@42 77 Tw = T8 - T9;
Chris@42 78 Tx = Tb - Tc;
Chris@42 79 Td = Tb + Tc;
Chris@42 80 }
Chris@42 81 Tn = R0[WS(rs, 15)];
Chris@42 82 To = R0[WS(rs, 7)];
Chris@42 83 Te = Ta + Td;
Chris@42 84 T2n = Td - Ta;
Chris@42 85 Ty = Tw + Tx;
Chris@42 86 T1i = Tx - Tw;
Chris@42 87 TD = Tn - To;
Chris@42 88 Tp = Tn + To;
Chris@42 89 Tq = R0[WS(rs, 3)];
Chris@42 90 Tr = R0[WS(rs, 11)];
Chris@42 91 }
Chris@42 92 }
Chris@42 93 {
Chris@42 94 E Tj, TA, Ti, Tk;
Chris@42 95 {
Chris@42 96 E Tg, Th, TE, Ts;
Chris@42 97 Tg = R0[WS(rs, 1)];
Chris@42 98 Th = R0[WS(rs, 9)];
Chris@42 99 Tj = R0[WS(rs, 5)];
Chris@42 100 TE = Tq - Tr;
Chris@42 101 Ts = Tq + Tr;
Chris@42 102 TA = Tg - Th;
Chris@42 103 Ti = Tg + Th;
Chris@42 104 T1l = FNMS(KP414213562, TD, TE);
Chris@42 105 TF = FMA(KP414213562, TE, TD);
Chris@42 106 T2d = Tp - Ts;
Chris@42 107 Tt = Tp + Ts;
Chris@42 108 Tk = R0[WS(rs, 13)];
Chris@42 109 }
Chris@42 110 {
Chris@42 111 E T11, T15, T1c, T20, T14, T16, T1X, T1Y, T1Q, T1R;
Chris@42 112 {
Chris@42 113 E T1a, T1b, T12, T13;
Chris@42 114 {
Chris@42 115 E TZ, T10, TB, Tl;
Chris@42 116 TZ = R1[WS(rs, 15)];
Chris@42 117 T10 = R1[WS(rs, 7)];
Chris@42 118 T1a = R1[WS(rs, 11)];
Chris@42 119 TB = Tj - Tk;
Chris@42 120 Tl = Tj + Tk;
Chris@42 121 T1X = TZ + T10;
Chris@42 122 T11 = TZ - T10;
Chris@42 123 T1k = FMA(KP414213562, TA, TB);
Chris@42 124 TC = FNMS(KP414213562, TB, TA);
Chris@42 125 T2c = Ti - Tl;
Chris@42 126 Tm = Ti + Tl;
Chris@42 127 T1b = R1[WS(rs, 3)];
Chris@42 128 }
Chris@42 129 T12 = R1[WS(rs, 1)];
Chris@42 130 T13 = R1[WS(rs, 9)];
Chris@42 131 T15 = R1[WS(rs, 13)];
Chris@42 132 T1Y = T1b + T1a;
Chris@42 133 T1c = T1a - T1b;
Chris@42 134 T20 = T12 + T13;
Chris@42 135 T14 = T12 - T13;
Chris@42 136 T16 = R1[WS(rs, 5)];
Chris@42 137 }
Chris@42 138 T2j = T1X - T1Y;
Chris@42 139 T1Z = T1X + T1Y;
Chris@42 140 {
Chris@42 141 E TT, TU, TL, TM;
Chris@42 142 {
Chris@42 143 E TI, T21, T17, TJ, T18, T1d;
Chris@42 144 TI = R1[0];
Chris@42 145 T21 = T15 + T16;
Chris@42 146 T17 = T15 - T16;
Chris@42 147 TJ = R1[WS(rs, 8)];
Chris@42 148 TT = R1[WS(rs, 4)];
Chris@42 149 T2k = T21 - T20;
Chris@42 150 T22 = T20 + T21;
Chris@42 151 T18 = T14 + T17;
Chris@42 152 T1d = T17 - T14;
Chris@42 153 T1Q = TI + TJ;
Chris@42 154 TK = TI - TJ;
Chris@42 155 T1B = FNMS(KP707106781, T18, T11);
Chris@42 156 T19 = FMA(KP707106781, T18, T11);
Chris@42 157 T1C = FNMS(KP707106781, T1d, T1c);
Chris@42 158 T1e = FMA(KP707106781, T1d, T1c);
Chris@42 159 TU = R1[WS(rs, 12)];
Chris@42 160 }
Chris@42 161 TL = R1[WS(rs, 2)];
Chris@42 162 TM = R1[WS(rs, 10)];
Chris@42 163 TO = R1[WS(rs, 14)];
Chris@42 164 T1R = TT + TU;
Chris@42 165 TV = TT - TU;
Chris@42 166 T1T = TL + TM;
Chris@42 167 TN = TL - TM;
Chris@42 168 TP = R1[WS(rs, 6)];
Chris@42 169 }
Chris@42 170 T2g = T1Q - T1R;
Chris@42 171 T1S = T1Q + T1R;
Chris@42 172 }
Chris@42 173 }
Chris@42 174 }
Chris@42 175 {
Chris@42 176 E T1P, T25, T23, T2h, T1W, T1y, TS, T1z, TX, T27, T2a;
Chris@42 177 {
Chris@42 178 E Tf, Tu, T29, T28;
Chris@42 179 {
Chris@42 180 E T1U, TQ, T1V, TR, TW;
Chris@42 181 T1P = T7 - Te;
Chris@42 182 Tf = T7 + Te;
Chris@42 183 T1U = TO + TP;
Chris@42 184 TQ = TO - TP;
Chris@42 185 Tu = Tm + Tt;
Chris@42 186 T25 = Tt - Tm;
Chris@42 187 T23 = T1Z - T22;
Chris@42 188 T29 = T1Z + T22;
Chris@42 189 T2h = T1U - T1T;
Chris@42 190 T1V = T1T + T1U;
Chris@42 191 TR = TN + TQ;
Chris@42 192 TW = TN - TQ;
Chris@42 193 T27 = Tf + Tu;
Chris@42 194 T1W = T1S - T1V;
Chris@42 195 T28 = T1S + T1V;
Chris@42 196 T1y = FNMS(KP707106781, TR, TK);
Chris@42 197 TS = FMA(KP707106781, TR, TK);
Chris@42 198 T1z = FNMS(KP707106781, TW, TV);
Chris@42 199 TX = FMA(KP707106781, TW, TV);
Chris@42 200 T2a = T28 + T29;
Chris@42 201 }
Chris@42 202 Cr[WS(csr, 8)] = Tf - Tu;
Chris@42 203 Ci[WS(csi, 8)] = T29 - T28;
Chris@42 204 }
Chris@42 205 Cr[0] = T27 + T2a;
Chris@42 206 Cr[WS(csr, 16)] = T27 - T2a;
Chris@42 207 {
Chris@42 208 E T2s, T2i, T2v, T2f, T2r, T2p, T2l, T2t;
Chris@42 209 {
Chris@42 210 E T2o, T2e, T26, T24;
Chris@42 211 T2o = T2d - T2c;
Chris@42 212 T2e = T2c + T2d;
Chris@42 213 T2s = FNMS(KP414213562, T2g, T2h);
Chris@42 214 T2i = FMA(KP414213562, T2h, T2g);
Chris@42 215 T26 = T23 - T1W;
Chris@42 216 T24 = T1W + T23;
Chris@42 217 T2v = FNMS(KP707106781, T2e, T2b);
Chris@42 218 T2f = FMA(KP707106781, T2e, T2b);
Chris@42 219 T2r = FMA(KP707106781, T2o, T2n);
Chris@42 220 T2p = FNMS(KP707106781, T2o, T2n);
Chris@42 221 Ci[WS(csi, 4)] = FMA(KP707106781, T26, T25);
Chris@42 222 Ci[WS(csi, 12)] = FMS(KP707106781, T26, T25);
Chris@42 223 Cr[WS(csr, 4)] = FMA(KP707106781, T24, T1P);
Chris@42 224 Cr[WS(csr, 12)] = FNMS(KP707106781, T24, T1P);
Chris@42 225 T2l = FNMS(KP414213562, T2k, T2j);
Chris@42 226 T2t = FMA(KP414213562, T2j, T2k);
Chris@42 227 }
Chris@42 228 {
Chris@42 229 E T1v, T1G, TH, T1s, T1F, T1w, T1o, T1g, T1p, T1n;
Chris@42 230 {
Chris@42 231 E T1f, TY, T1t, T1u, T1j, T1m;
Chris@42 232 {
Chris@42 233 E Tz, TG, T1q, T1r;
Chris@42 234 T1v = FNMS(KP707106781, Ty, Tv);
Chris@42 235 Tz = FMA(KP707106781, Ty, Tv);
Chris@42 236 {
Chris@42 237 E T2q, T2m, T2w, T2u;
Chris@42 238 T2q = T2l - T2i;
Chris@42 239 T2m = T2i + T2l;
Chris@42 240 T2w = T2t - T2s;
Chris@42 241 T2u = T2s + T2t;
Chris@42 242 Ci[WS(csi, 10)] = FMA(KP923879532, T2q, T2p);
Chris@42 243 Ci[WS(csi, 6)] = FMS(KP923879532, T2q, T2p);
Chris@42 244 Cr[WS(csr, 2)] = FMA(KP923879532, T2m, T2f);
Chris@42 245 Cr[WS(csr, 14)] = FNMS(KP923879532, T2m, T2f);
Chris@42 246 Cr[WS(csr, 10)] = FNMS(KP923879532, T2w, T2v);
Chris@42 247 Cr[WS(csr, 6)] = FMA(KP923879532, T2w, T2v);
Chris@42 248 Ci[WS(csi, 2)] = FMA(KP923879532, T2u, T2r);
Chris@42 249 Ci[WS(csi, 14)] = FMS(KP923879532, T2u, T2r);
Chris@42 250 TG = TC + TF;
Chris@42 251 T1G = TF - TC;
Chris@42 252 }
Chris@42 253 T1f = FNMS(KP198912367, T1e, T19);
Chris@42 254 T1q = FMA(KP198912367, T19, T1e);
Chris@42 255 T1r = FMA(KP198912367, TS, TX);
Chris@42 256 TY = FNMS(KP198912367, TX, TS);
Chris@42 257 T1t = FNMS(KP923879532, TG, Tz);
Chris@42 258 TH = FMA(KP923879532, TG, Tz);
Chris@42 259 T1u = T1r + T1q;
Chris@42 260 T1s = T1q - T1r;
Chris@42 261 T1F = FMA(KP707106781, T1i, T1h);
Chris@42 262 T1j = FNMS(KP707106781, T1i, T1h);
Chris@42 263 T1m = T1k + T1l;
Chris@42 264 T1w = T1k - T1l;
Chris@42 265 }
Chris@42 266 Cr[WS(csr, 7)] = FMA(KP980785280, T1u, T1t);
Chris@42 267 T1o = T1f - TY;
Chris@42 268 T1g = TY + T1f;
Chris@42 269 T1p = FMA(KP923879532, T1m, T1j);
Chris@42 270 T1n = FNMS(KP923879532, T1m, T1j);
Chris@42 271 Cr[WS(csr, 9)] = FNMS(KP980785280, T1u, T1t);
Chris@42 272 }
Chris@42 273 Cr[WS(csr, 1)] = FMA(KP980785280, T1g, TH);
Chris@42 274 Cr[WS(csr, 15)] = FNMS(KP980785280, T1g, TH);
Chris@42 275 Ci[WS(csi, 1)] = FMS(KP980785280, T1s, T1p);
Chris@42 276 Ci[WS(csi, 15)] = FMA(KP980785280, T1s, T1p);
Chris@42 277 Ci[WS(csi, 9)] = FMS(KP980785280, T1o, T1n);
Chris@42 278 Ci[WS(csi, 7)] = FMA(KP980785280, T1o, T1n);
Chris@42 279 {
Chris@42 280 E T1A, T1D, T1N, T1O, T1K, T1L;
Chris@42 281 T1A = FMA(KP668178637, T1z, T1y);
Chris@42 282 T1K = FNMS(KP668178637, T1y, T1z);
Chris@42 283 T1L = FNMS(KP668178637, T1B, T1C);
Chris@42 284 T1D = FMA(KP668178637, T1C, T1B);
Chris@42 285 T1N = FNMS(KP923879532, T1w, T1v);
Chris@42 286 T1x = FMA(KP923879532, T1w, T1v);
Chris@42 287 T1O = T1K + T1L;
Chris@42 288 T1M = T1K - T1L;
Chris@42 289 Cr[WS(csr, 5)] = FNMS(KP831469612, T1O, T1N);
Chris@42 290 T1I = T1D - T1A;
Chris@42 291 T1E = T1A + T1D;
Chris@42 292 T1J = FMA(KP923879532, T1G, T1F);
Chris@42 293 T1H = FNMS(KP923879532, T1G, T1F);
Chris@42 294 Cr[WS(csr, 11)] = FMA(KP831469612, T1O, T1N);
Chris@42 295 }
Chris@42 296 }
Chris@42 297 }
Chris@42 298 }
Chris@42 299 }
Chris@42 300 Ci[WS(csi, 3)] = FMA(KP831469612, T1M, T1J);
Chris@42 301 Cr[WS(csr, 3)] = FMA(KP831469612, T1E, T1x);
Chris@42 302 Ci[WS(csi, 13)] = FMS(KP831469612, T1M, T1J);
Chris@42 303 Cr[WS(csr, 13)] = FNMS(KP831469612, T1E, T1x);
Chris@42 304 Ci[WS(csi, 11)] = FMA(KP831469612, T1I, T1H);
Chris@42 305 Ci[WS(csi, 5)] = FMS(KP831469612, T1I, T1H);
Chris@42 306 }
Chris@42 307 }
Chris@42 308 }
Chris@42 309
Chris@42 310 static const kr2c_desc desc = { 32, "r2cf_32", {88, 0, 68, 0}, &GENUS };
Chris@42 311
Chris@42 312 void X(codelet_r2cf_32) (planner *p) {
Chris@42 313 X(kr2c_register) (p, r2cf_32, &desc);
Chris@42 314 }
Chris@42 315
Chris@42 316 #else /* HAVE_FMA */
Chris@42 317
Chris@42 318 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include r2cf.h */
Chris@42 319
Chris@42 320 /*
Chris@42 321 * This function contains 156 FP additions, 42 FP multiplications,
Chris@42 322 * (or, 140 additions, 26 multiplications, 16 fused multiply/add),
Chris@42 323 * 54 stack variables, 7 constants, and 64 memory accesses
Chris@42 324 */
Chris@42 325 #include "r2cf.h"
Chris@42 326
Chris@42 327 static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 328 {
Chris@42 329 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 330 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 331 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 332 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 333 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 334 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 335 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 336 {
Chris@42 337 INT i;
Chris@42 338 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@42 339 E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC;
Chris@42 340 E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
Chris@42 341 E TS, T1y;
Chris@42 342 {
Chris@42 343 E T1, T2, T3, T4, T5, T6;
Chris@42 344 T1 = R0[0];
Chris@42 345 T2 = R0[WS(rs, 8)];
Chris@42 346 T3 = T1 + T2;
Chris@42 347 T4 = R0[WS(rs, 4)];
Chris@42 348 T5 = R0[WS(rs, 12)];
Chris@42 349 T6 = T4 + T5;
Chris@42 350 T7 = T3 + T6;
Chris@42 351 T2b = T3 - T6;
Chris@42 352 Tv = T1 - T2;
Chris@42 353 T1l = T4 - T5;
Chris@42 354 }
Chris@42 355 {
Chris@42 356 E Ta, Tw, Td, Tx;
Chris@42 357 {
Chris@42 358 E T8, T9, Tb, Tc;
Chris@42 359 T8 = R0[WS(rs, 2)];
Chris@42 360 T9 = R0[WS(rs, 10)];
Chris@42 361 Ta = T8 + T9;
Chris@42 362 Tw = T8 - T9;
Chris@42 363 Tb = R0[WS(rs, 14)];
Chris@42 364 Tc = R0[WS(rs, 6)];
Chris@42 365 Td = Tb + Tc;
Chris@42 366 Tx = Tb - Tc;
Chris@42 367 }
Chris@42 368 Te = Ta + Td;
Chris@42 369 T2o = Td - Ta;
Chris@42 370 Ty = KP707106781 * (Tw + Tx);
Chris@42 371 T1k = KP707106781 * (Tx - Tw);
Chris@42 372 }
Chris@42 373 {
Chris@42 374 E Tp, TD, Ts, TE;
Chris@42 375 {
Chris@42 376 E Tn, To, Tq, Tr;
Chris@42 377 Tn = R0[WS(rs, 15)];
Chris@42 378 To = R0[WS(rs, 7)];
Chris@42 379 Tp = Tn + To;
Chris@42 380 TD = Tn - To;
Chris@42 381 Tq = R0[WS(rs, 3)];
Chris@42 382 Tr = R0[WS(rs, 11)];
Chris@42 383 Ts = Tq + Tr;
Chris@42 384 TE = Tq - Tr;
Chris@42 385 }
Chris@42 386 Tt = Tp + Ts;
Chris@42 387 T2d = Tp - Ts;
Chris@42 388 TF = FMA(KP923879532, TD, KP382683432 * TE);
Chris@42 389 T1h = FNMS(KP923879532, TE, KP382683432 * TD);
Chris@42 390 }
Chris@42 391 {
Chris@42 392 E Ti, TA, Tl, TB;
Chris@42 393 {
Chris@42 394 E Tg, Th, Tj, Tk;
Chris@42 395 Tg = R0[WS(rs, 1)];
Chris@42 396 Th = R0[WS(rs, 9)];
Chris@42 397 Ti = Tg + Th;
Chris@42 398 TA = Tg - Th;
Chris@42 399 Tj = R0[WS(rs, 5)];
Chris@42 400 Tk = R0[WS(rs, 13)];
Chris@42 401 Tl = Tj + Tk;
Chris@42 402 TB = Tj - Tk;
Chris@42 403 }
Chris@42 404 Tm = Ti + Tl;
Chris@42 405 T2c = Ti - Tl;
Chris@42 406 TC = FNMS(KP382683432, TB, KP923879532 * TA);
Chris@42 407 T1i = FMA(KP382683432, TA, KP923879532 * TB);
Chris@42 408 }
Chris@42 409 {
Chris@42 410 E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18;
Chris@42 411 {
Chris@42 412 E TZ, T10, T1b, T1c;
Chris@42 413 TZ = R1[WS(rs, 15)];
Chris@42 414 T10 = R1[WS(rs, 7)];
Chris@42 415 T11 = TZ - T10;
Chris@42 416 T1X = TZ + T10;
Chris@42 417 T1b = R1[WS(rs, 3)];
Chris@42 418 T1c = R1[WS(rs, 11)];
Chris@42 419 T1d = T1b - T1c;
Chris@42 420 T1Y = T1b + T1c;
Chris@42 421 }
Chris@42 422 {
Chris@42 423 E T12, T13, T15, T16;
Chris@42 424 T12 = R1[WS(rs, 1)];
Chris@42 425 T13 = R1[WS(rs, 9)];
Chris@42 426 T14 = T12 - T13;
Chris@42 427 T20 = T12 + T13;
Chris@42 428 T15 = R1[WS(rs, 13)];
Chris@42 429 T16 = R1[WS(rs, 5)];
Chris@42 430 T17 = T15 - T16;
Chris@42 431 T21 = T15 + T16;
Chris@42 432 }
Chris@42 433 T1Z = T1X + T1Y;
Chris@42 434 T22 = T20 + T21;
Chris@42 435 T2k = T21 - T20;
Chris@42 436 T2j = T1X - T1Y;
Chris@42 437 T1a = KP707106781 * (T17 - T14);
Chris@42 438 T1e = T1a - T1d;
Chris@42 439 T1C = T1d + T1a;
Chris@42 440 T18 = KP707106781 * (T14 + T17);
Chris@42 441 T19 = T11 + T18;
Chris@42 442 T1B = T11 - T18;
Chris@42 443 }
Chris@42 444 {
Chris@42 445 E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR;
Chris@42 446 {
Chris@42 447 E TI, TJ, TU, TV;
Chris@42 448 TI = R1[0];
Chris@42 449 TJ = R1[WS(rs, 8)];
Chris@42 450 TK = TI - TJ;
Chris@42 451 T1Q = TI + TJ;
Chris@42 452 TU = R1[WS(rs, 4)];
Chris@42 453 TV = R1[WS(rs, 12)];
Chris@42 454 TW = TU - TV;
Chris@42 455 T1R = TU + TV;
Chris@42 456 }
Chris@42 457 {
Chris@42 458 E TL, TM, TO, TP;
Chris@42 459 TL = R1[WS(rs, 2)];
Chris@42 460 TM = R1[WS(rs, 10)];
Chris@42 461 TN = TL - TM;
Chris@42 462 T1T = TL + TM;
Chris@42 463 TO = R1[WS(rs, 14)];
Chris@42 464 TP = R1[WS(rs, 6)];
Chris@42 465 TQ = TO - TP;
Chris@42 466 T1U = TO + TP;
Chris@42 467 }
Chris@42 468 T1S = T1Q + T1R;
Chris@42 469 T1V = T1T + T1U;
Chris@42 470 T2h = T1U - T1T;
Chris@42 471 T2g = T1Q - T1R;
Chris@42 472 TT = KP707106781 * (TQ - TN);
Chris@42 473 TX = TT - TW;
Chris@42 474 T1z = TW + TT;
Chris@42 475 TR = KP707106781 * (TN + TQ);
Chris@42 476 TS = TK + TR;
Chris@42 477 T1y = TK - TR;
Chris@42 478 }
Chris@42 479 {
Chris@42 480 E Tf, Tu, T27, T28, T29, T2a;
Chris@42 481 Tf = T7 + Te;
Chris@42 482 Tu = Tm + Tt;
Chris@42 483 T27 = Tf + Tu;
Chris@42 484 T28 = T1S + T1V;
Chris@42 485 T29 = T1Z + T22;
Chris@42 486 T2a = T28 + T29;
Chris@42 487 Cr[WS(csr, 8)] = Tf - Tu;
Chris@42 488 Ci[WS(csi, 8)] = T29 - T28;
Chris@42 489 Cr[WS(csr, 16)] = T27 - T2a;
Chris@42 490 Cr[0] = T27 + T2a;
Chris@42 491 }
Chris@42 492 {
Chris@42 493 E T1P, T25, T24, T26, T1W, T23;
Chris@42 494 T1P = T7 - Te;
Chris@42 495 T25 = Tt - Tm;
Chris@42 496 T1W = T1S - T1V;
Chris@42 497 T23 = T1Z - T22;
Chris@42 498 T24 = KP707106781 * (T1W + T23);
Chris@42 499 T26 = KP707106781 * (T23 - T1W);
Chris@42 500 Cr[WS(csr, 12)] = T1P - T24;
Chris@42 501 Ci[WS(csi, 12)] = T26 - T25;
Chris@42 502 Cr[WS(csr, 4)] = T1P + T24;
Chris@42 503 Ci[WS(csi, 4)] = T25 + T26;
Chris@42 504 }
Chris@42 505 {
Chris@42 506 E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n;
Chris@42 507 T2e = KP707106781 * (T2c + T2d);
Chris@42 508 T2f = T2b + T2e;
Chris@42 509 T2v = T2b - T2e;
Chris@42 510 T2n = KP707106781 * (T2d - T2c);
Chris@42 511 T2p = T2n - T2o;
Chris@42 512 T2r = T2o + T2n;
Chris@42 513 {
Chris@42 514 E T2i, T2l, T2s, T2t;
Chris@42 515 T2i = FMA(KP923879532, T2g, KP382683432 * T2h);
Chris@42 516 T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
Chris@42 517 T2m = T2i + T2l;
Chris@42 518 T2q = T2l - T2i;
Chris@42 519 T2s = FNMS(KP382683432, T2g, KP923879532 * T2h);
Chris@42 520 T2t = FMA(KP382683432, T2j, KP923879532 * T2k);
Chris@42 521 T2u = T2s + T2t;
Chris@42 522 T2w = T2t - T2s;
Chris@42 523 }
Chris@42 524 Cr[WS(csr, 14)] = T2f - T2m;
Chris@42 525 Ci[WS(csi, 14)] = T2u - T2r;
Chris@42 526 Cr[WS(csr, 2)] = T2f + T2m;
Chris@42 527 Ci[WS(csi, 2)] = T2r + T2u;
Chris@42 528 Ci[WS(csi, 6)] = T2p + T2q;
Chris@42 529 Cr[WS(csr, 6)] = T2v + T2w;
Chris@42 530 Ci[WS(csi, 10)] = T2q - T2p;
Chris@42 531 Cr[WS(csr, 10)] = T2v - T2w;
Chris@42 532 }
Chris@42 533 {
Chris@42 534 E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
Chris@42 535 {
Chris@42 536 E Tz, TG, T1q, T1r;
Chris@42 537 Tz = Tv + Ty;
Chris@42 538 TG = TC + TF;
Chris@42 539 TH = Tz + TG;
Chris@42 540 T1t = Tz - TG;
Chris@42 541 T1q = FNMS(KP195090322, TS, KP980785280 * TX);
Chris@42 542 T1r = FMA(KP195090322, T19, KP980785280 * T1e);
Chris@42 543 T1s = T1q + T1r;
Chris@42 544 T1u = T1r - T1q;
Chris@42 545 }
Chris@42 546 {
Chris@42 547 E TY, T1f, T1j, T1m;
Chris@42 548 TY = FMA(KP980785280, TS, KP195090322 * TX);
Chris@42 549 T1f = FNMS(KP195090322, T1e, KP980785280 * T19);
Chris@42 550 T1g = TY + T1f;
Chris@42 551 T1o = T1f - TY;
Chris@42 552 T1j = T1h - T1i;
Chris@42 553 T1m = T1k - T1l;
Chris@42 554 T1n = T1j - T1m;
Chris@42 555 T1p = T1m + T1j;
Chris@42 556 }
Chris@42 557 Cr[WS(csr, 15)] = TH - T1g;
Chris@42 558 Ci[WS(csi, 15)] = T1s - T1p;
Chris@42 559 Cr[WS(csr, 1)] = TH + T1g;
Chris@42 560 Ci[WS(csi, 1)] = T1p + T1s;
Chris@42 561 Ci[WS(csi, 7)] = T1n + T1o;
Chris@42 562 Cr[WS(csr, 7)] = T1t + T1u;
Chris@42 563 Ci[WS(csi, 9)] = T1o - T1n;
Chris@42 564 Cr[WS(csr, 9)] = T1t - T1u;
Chris@42 565 }
Chris@42 566 {
Chris@42 567 E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
Chris@42 568 {
Chris@42 569 E T1v, T1w, T1K, T1L;
Chris@42 570 T1v = Tv - Ty;
Chris@42 571 T1w = T1i + T1h;
Chris@42 572 T1x = T1v + T1w;
Chris@42 573 T1N = T1v - T1w;
Chris@42 574 T1K = FNMS(KP555570233, T1y, KP831469612 * T1z);
Chris@42 575 T1L = FMA(KP555570233, T1B, KP831469612 * T1C);
Chris@42 576 T1M = T1K + T1L;
Chris@42 577 T1O = T1L - T1K;
Chris@42 578 }
Chris@42 579 {
Chris@42 580 E T1A, T1D, T1F, T1G;
Chris@42 581 T1A = FMA(KP831469612, T1y, KP555570233 * T1z);
Chris@42 582 T1D = FNMS(KP555570233, T1C, KP831469612 * T1B);
Chris@42 583 T1E = T1A + T1D;
Chris@42 584 T1I = T1D - T1A;
Chris@42 585 T1F = TF - TC;
Chris@42 586 T1G = T1l + T1k;
Chris@42 587 T1H = T1F - T1G;
Chris@42 588 T1J = T1G + T1F;
Chris@42 589 }
Chris@42 590 Cr[WS(csr, 13)] = T1x - T1E;
Chris@42 591 Ci[WS(csi, 13)] = T1M - T1J;
Chris@42 592 Cr[WS(csr, 3)] = T1x + T1E;
Chris@42 593 Ci[WS(csi, 3)] = T1J + T1M;
Chris@42 594 Ci[WS(csi, 5)] = T1H + T1I;
Chris@42 595 Cr[WS(csr, 5)] = T1N + T1O;
Chris@42 596 Ci[WS(csi, 11)] = T1I - T1H;
Chris@42 597 Cr[WS(csr, 11)] = T1N - T1O;
Chris@42 598 }
Chris@42 599 }
Chris@42 600 }
Chris@42 601 }
Chris@42 602
Chris@42 603 static const kr2c_desc desc = { 32, "r2cf_32", {140, 26, 16, 0}, &GENUS };
Chris@42 604
Chris@42 605 void X(codelet_r2cf_32) (planner *p) {
Chris@42 606 X(kr2c_register) (p, r2cf_32, &desc);
Chris@42 607 }
Chris@42 608
Chris@42 609 #endif /* HAVE_FMA */