annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:27 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 156 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 88 additions, 0 multiplications, 68 fused multiply/add),
Chris@82 33 * 54 stack variables, 7 constants, and 64 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 40 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 41 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 42 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 45 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 46 {
Chris@82 47 INT i;
Chris@82 48 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 49 E T7, T2b, Tv, T1h, Te, T2n, Ty, T1i, Tt, T2d, TF, T1l, Tm, T2c, TC;
Chris@82 50 E T1k, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
Chris@82 51 E TS, T1y;
Chris@82 52 {
Chris@82 53 E T1, T2, T3, T4, T5, T6;
Chris@82 54 T1 = R0[0];
Chris@82 55 T2 = R0[WS(rs, 8)];
Chris@82 56 T3 = T1 + T2;
Chris@82 57 T4 = R0[WS(rs, 4)];
Chris@82 58 T5 = R0[WS(rs, 12)];
Chris@82 59 T6 = T4 + T5;
Chris@82 60 T7 = T3 + T6;
Chris@82 61 T2b = T3 - T6;
Chris@82 62 Tv = T1 - T2;
Chris@82 63 T1h = T4 - T5;
Chris@82 64 }
Chris@82 65 {
Chris@82 66 E Ta, Tw, Td, Tx;
Chris@82 67 {
Chris@82 68 E T8, T9, Tb, Tc;
Chris@82 69 T8 = R0[WS(rs, 2)];
Chris@82 70 T9 = R0[WS(rs, 10)];
Chris@82 71 Ta = T8 + T9;
Chris@82 72 Tw = T8 - T9;
Chris@82 73 Tb = R0[WS(rs, 14)];
Chris@82 74 Tc = R0[WS(rs, 6)];
Chris@82 75 Td = Tb + Tc;
Chris@82 76 Tx = Tb - Tc;
Chris@82 77 }
Chris@82 78 Te = Ta + Td;
Chris@82 79 T2n = Td - Ta;
Chris@82 80 Ty = Tw + Tx;
Chris@82 81 T1i = Tx - Tw;
Chris@82 82 }
Chris@82 83 {
Chris@82 84 E Tp, TD, Ts, TE;
Chris@82 85 {
Chris@82 86 E Tn, To, Tq, Tr;
Chris@82 87 Tn = R0[WS(rs, 15)];
Chris@82 88 To = R0[WS(rs, 7)];
Chris@82 89 Tp = Tn + To;
Chris@82 90 TD = Tn - To;
Chris@82 91 Tq = R0[WS(rs, 3)];
Chris@82 92 Tr = R0[WS(rs, 11)];
Chris@82 93 Ts = Tq + Tr;
Chris@82 94 TE = Tq - Tr;
Chris@82 95 }
Chris@82 96 Tt = Tp + Ts;
Chris@82 97 T2d = Tp - Ts;
Chris@82 98 TF = FMA(KP414213562, TE, TD);
Chris@82 99 T1l = FNMS(KP414213562, TD, TE);
Chris@82 100 }
Chris@82 101 {
Chris@82 102 E Ti, TA, Tl, TB;
Chris@82 103 {
Chris@82 104 E Tg, Th, Tj, Tk;
Chris@82 105 Tg = R0[WS(rs, 1)];
Chris@82 106 Th = R0[WS(rs, 9)];
Chris@82 107 Ti = Tg + Th;
Chris@82 108 TA = Tg - Th;
Chris@82 109 Tj = R0[WS(rs, 5)];
Chris@82 110 Tk = R0[WS(rs, 13)];
Chris@82 111 Tl = Tj + Tk;
Chris@82 112 TB = Tj - Tk;
Chris@82 113 }
Chris@82 114 Tm = Ti + Tl;
Chris@82 115 T2c = Ti - Tl;
Chris@82 116 TC = FNMS(KP414213562, TB, TA);
Chris@82 117 T1k = FMA(KP414213562, TA, TB);
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E T11, T1X, T1c, T1Y, T14, T20, T17, T21, T1d, T18;
Chris@82 121 {
Chris@82 122 E TZ, T10, T1a, T1b;
Chris@82 123 TZ = R1[WS(rs, 15)];
Chris@82 124 T10 = R1[WS(rs, 7)];
Chris@82 125 T11 = TZ - T10;
Chris@82 126 T1X = TZ + T10;
Chris@82 127 T1a = R1[WS(rs, 11)];
Chris@82 128 T1b = R1[WS(rs, 3)];
Chris@82 129 T1c = T1a - T1b;
Chris@82 130 T1Y = T1b + T1a;
Chris@82 131 }
Chris@82 132 {
Chris@82 133 E T12, T13, T15, T16;
Chris@82 134 T12 = R1[WS(rs, 1)];
Chris@82 135 T13 = R1[WS(rs, 9)];
Chris@82 136 T14 = T12 - T13;
Chris@82 137 T20 = T12 + T13;
Chris@82 138 T15 = R1[WS(rs, 13)];
Chris@82 139 T16 = R1[WS(rs, 5)];
Chris@82 140 T17 = T15 - T16;
Chris@82 141 T21 = T15 + T16;
Chris@82 142 }
Chris@82 143 T1Z = T1X + T1Y;
Chris@82 144 T22 = T20 + T21;
Chris@82 145 T2k = T21 - T20;
Chris@82 146 T2j = T1X - T1Y;
Chris@82 147 T1d = T17 - T14;
Chris@82 148 T1e = FMA(KP707106781, T1d, T1c);
Chris@82 149 T1C = FNMS(KP707106781, T1d, T1c);
Chris@82 150 T18 = T14 + T17;
Chris@82 151 T19 = FMA(KP707106781, T18, T11);
Chris@82 152 T1B = FNMS(KP707106781, T18, T11);
Chris@82 153 }
Chris@82 154 {
Chris@82 155 E TK, T1Q, TV, T1R, TN, T1T, TQ, T1U, TW, TR;
Chris@82 156 {
Chris@82 157 E TI, TJ, TT, TU;
Chris@82 158 TI = R1[0];
Chris@82 159 TJ = R1[WS(rs, 8)];
Chris@82 160 TK = TI - TJ;
Chris@82 161 T1Q = TI + TJ;
Chris@82 162 TT = R1[WS(rs, 4)];
Chris@82 163 TU = R1[WS(rs, 12)];
Chris@82 164 TV = TT - TU;
Chris@82 165 T1R = TT + TU;
Chris@82 166 }
Chris@82 167 {
Chris@82 168 E TL, TM, TO, TP;
Chris@82 169 TL = R1[WS(rs, 2)];
Chris@82 170 TM = R1[WS(rs, 10)];
Chris@82 171 TN = TL - TM;
Chris@82 172 T1T = TL + TM;
Chris@82 173 TO = R1[WS(rs, 14)];
Chris@82 174 TP = R1[WS(rs, 6)];
Chris@82 175 TQ = TO - TP;
Chris@82 176 T1U = TO + TP;
Chris@82 177 }
Chris@82 178 T1S = T1Q + T1R;
Chris@82 179 T1V = T1T + T1U;
Chris@82 180 T2h = T1U - T1T;
Chris@82 181 T2g = T1Q - T1R;
Chris@82 182 TW = TN - TQ;
Chris@82 183 TX = FMA(KP707106781, TW, TV);
Chris@82 184 T1z = FNMS(KP707106781, TW, TV);
Chris@82 185 TR = TN + TQ;
Chris@82 186 TS = FMA(KP707106781, TR, TK);
Chris@82 187 T1y = FNMS(KP707106781, TR, TK);
Chris@82 188 }
Chris@82 189 {
Chris@82 190 E Tf, Tu, T27, T28, T29, T2a;
Chris@82 191 Tf = T7 + Te;
Chris@82 192 Tu = Tm + Tt;
Chris@82 193 T27 = Tf + Tu;
Chris@82 194 T28 = T1S + T1V;
Chris@82 195 T29 = T1Z + T22;
Chris@82 196 T2a = T28 + T29;
Chris@82 197 Cr[WS(csr, 8)] = Tf - Tu;
Chris@82 198 Ci[WS(csi, 8)] = T29 - T28;
Chris@82 199 Cr[WS(csr, 16)] = T27 - T2a;
Chris@82 200 Cr[0] = T27 + T2a;
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E T1P, T25, T24, T26, T1W, T23;
Chris@82 204 T1P = T7 - Te;
Chris@82 205 T25 = Tt - Tm;
Chris@82 206 T1W = T1S - T1V;
Chris@82 207 T23 = T1Z - T22;
Chris@82 208 T24 = T1W + T23;
Chris@82 209 T26 = T23 - T1W;
Chris@82 210 Cr[WS(csr, 12)] = FNMS(KP707106781, T24, T1P);
Chris@82 211 Ci[WS(csi, 12)] = FMS(KP707106781, T26, T25);
Chris@82 212 Cr[WS(csr, 4)] = FMA(KP707106781, T24, T1P);
Chris@82 213 Ci[WS(csi, 4)] = FMA(KP707106781, T26, T25);
Chris@82 214 }
Chris@82 215 {
Chris@82 216 E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2o;
Chris@82 217 T2e = T2c + T2d;
Chris@82 218 T2f = FMA(KP707106781, T2e, T2b);
Chris@82 219 T2v = FNMS(KP707106781, T2e, T2b);
Chris@82 220 T2o = T2d - T2c;
Chris@82 221 T2p = FNMS(KP707106781, T2o, T2n);
Chris@82 222 T2r = FMA(KP707106781, T2o, T2n);
Chris@82 223 {
Chris@82 224 E T2i, T2l, T2s, T2t;
Chris@82 225 T2i = FMA(KP414213562, T2h, T2g);
Chris@82 226 T2l = FNMS(KP414213562, T2k, T2j);
Chris@82 227 T2m = T2i + T2l;
Chris@82 228 T2q = T2l - T2i;
Chris@82 229 T2s = FNMS(KP414213562, T2g, T2h);
Chris@82 230 T2t = FMA(KP414213562, T2j, T2k);
Chris@82 231 T2u = T2s + T2t;
Chris@82 232 T2w = T2t - T2s;
Chris@82 233 }
Chris@82 234 Cr[WS(csr, 14)] = FNMS(KP923879532, T2m, T2f);
Chris@82 235 Ci[WS(csi, 14)] = FMS(KP923879532, T2u, T2r);
Chris@82 236 Cr[WS(csr, 2)] = FMA(KP923879532, T2m, T2f);
Chris@82 237 Ci[WS(csi, 2)] = FMA(KP923879532, T2u, T2r);
Chris@82 238 Ci[WS(csi, 6)] = FMS(KP923879532, T2q, T2p);
Chris@82 239 Cr[WS(csr, 6)] = FMA(KP923879532, T2w, T2v);
Chris@82 240 Ci[WS(csi, 10)] = FMA(KP923879532, T2q, T2p);
Chris@82 241 Cr[WS(csr, 10)] = FNMS(KP923879532, T2w, T2v);
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
Chris@82 245 {
Chris@82 246 E Tz, TG, T1q, T1r;
Chris@82 247 Tz = FMA(KP707106781, Ty, Tv);
Chris@82 248 TG = TC + TF;
Chris@82 249 TH = FMA(KP923879532, TG, Tz);
Chris@82 250 T1t = FNMS(KP923879532, TG, Tz);
Chris@82 251 T1q = FMA(KP198912367, T19, T1e);
Chris@82 252 T1r = FMA(KP198912367, TS, TX);
Chris@82 253 T1s = T1q - T1r;
Chris@82 254 T1u = T1r + T1q;
Chris@82 255 }
Chris@82 256 {
Chris@82 257 E TY, T1f, T1j, T1m;
Chris@82 258 TY = FNMS(KP198912367, TX, TS);
Chris@82 259 T1f = FNMS(KP198912367, T1e, T19);
Chris@82 260 T1g = TY + T1f;
Chris@82 261 T1o = T1f - TY;
Chris@82 262 T1j = FNMS(KP707106781, T1i, T1h);
Chris@82 263 T1m = T1k + T1l;
Chris@82 264 T1n = FNMS(KP923879532, T1m, T1j);
Chris@82 265 T1p = FMA(KP923879532, T1m, T1j);
Chris@82 266 }
Chris@82 267 Cr[WS(csr, 15)] = FNMS(KP980785280, T1g, TH);
Chris@82 268 Ci[WS(csi, 15)] = FMA(KP980785280, T1s, T1p);
Chris@82 269 Cr[WS(csr, 1)] = FMA(KP980785280, T1g, TH);
Chris@82 270 Ci[WS(csi, 1)] = FMS(KP980785280, T1s, T1p);
Chris@82 271 Ci[WS(csi, 7)] = FMA(KP980785280, T1o, T1n);
Chris@82 272 Cr[WS(csr, 7)] = FMA(KP980785280, T1u, T1t);
Chris@82 273 Ci[WS(csi, 9)] = FMS(KP980785280, T1o, T1n);
Chris@82 274 Cr[WS(csr, 9)] = FNMS(KP980785280, T1u, T1t);
Chris@82 275 }
Chris@82 276 {
Chris@82 277 E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
Chris@82 278 {
Chris@82 279 E T1v, T1w, T1K, T1L;
Chris@82 280 T1v = FNMS(KP707106781, Ty, Tv);
Chris@82 281 T1w = T1k - T1l;
Chris@82 282 T1x = FMA(KP923879532, T1w, T1v);
Chris@82 283 T1N = FNMS(KP923879532, T1w, T1v);
Chris@82 284 T1K = FNMS(KP668178637, T1y, T1z);
Chris@82 285 T1L = FNMS(KP668178637, T1B, T1C);
Chris@82 286 T1M = T1K - T1L;
Chris@82 287 T1O = T1K + T1L;
Chris@82 288 }
Chris@82 289 {
Chris@82 290 E T1A, T1D, T1F, T1G;
Chris@82 291 T1A = FMA(KP668178637, T1z, T1y);
Chris@82 292 T1D = FMA(KP668178637, T1C, T1B);
Chris@82 293 T1E = T1A + T1D;
Chris@82 294 T1I = T1D - T1A;
Chris@82 295 T1F = FMA(KP707106781, T1i, T1h);
Chris@82 296 T1G = TF - TC;
Chris@82 297 T1H = FNMS(KP923879532, T1G, T1F);
Chris@82 298 T1J = FMA(KP923879532, T1G, T1F);
Chris@82 299 }
Chris@82 300 Cr[WS(csr, 13)] = FNMS(KP831469612, T1E, T1x);
Chris@82 301 Ci[WS(csi, 13)] = FMS(KP831469612, T1M, T1J);
Chris@82 302 Cr[WS(csr, 3)] = FMA(KP831469612, T1E, T1x);
Chris@82 303 Ci[WS(csi, 3)] = FMA(KP831469612, T1M, T1J);
Chris@82 304 Ci[WS(csi, 5)] = FMS(KP831469612, T1I, T1H);
Chris@82 305 Cr[WS(csr, 5)] = FNMS(KP831469612, T1O, T1N);
Chris@82 306 Ci[WS(csi, 11)] = FMA(KP831469612, T1I, T1H);
Chris@82 307 Cr[WS(csr, 11)] = FMA(KP831469612, T1O, T1N);
Chris@82 308 }
Chris@82 309 }
Chris@82 310 }
Chris@82 311 }
Chris@82 312
Chris@82 313 static const kr2c_desc desc = { 32, "r2cf_32", {88, 0, 68, 0}, &GENUS };
Chris@82 314
Chris@82 315 void X(codelet_r2cf_32) (planner *p) {
Chris@82 316 X(kr2c_register) (p, r2cf_32, &desc);
Chris@82 317 }
Chris@82 318
Chris@82 319 #else
Chris@82 320
Chris@82 321 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cf_32 -include rdft/scalar/r2cf.h */
Chris@82 322
Chris@82 323 /*
Chris@82 324 * This function contains 156 FP additions, 42 FP multiplications,
Chris@82 325 * (or, 140 additions, 26 multiplications, 16 fused multiply/add),
Chris@82 326 * 54 stack variables, 7 constants, and 64 memory accesses
Chris@82 327 */
Chris@82 328 #include "rdft/scalar/r2cf.h"
Chris@82 329
Chris@82 330 static void r2cf_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 331 {
Chris@82 332 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 333 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 334 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 335 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 336 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 337 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 338 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 339 {
Chris@82 340 INT i;
Chris@82 341 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
Chris@82 342 E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC;
Chris@82 343 E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
Chris@82 344 E TS, T1y;
Chris@82 345 {
Chris@82 346 E T1, T2, T3, T4, T5, T6;
Chris@82 347 T1 = R0[0];
Chris@82 348 T2 = R0[WS(rs, 8)];
Chris@82 349 T3 = T1 + T2;
Chris@82 350 T4 = R0[WS(rs, 4)];
Chris@82 351 T5 = R0[WS(rs, 12)];
Chris@82 352 T6 = T4 + T5;
Chris@82 353 T7 = T3 + T6;
Chris@82 354 T2b = T3 - T6;
Chris@82 355 Tv = T1 - T2;
Chris@82 356 T1l = T4 - T5;
Chris@82 357 }
Chris@82 358 {
Chris@82 359 E Ta, Tw, Td, Tx;
Chris@82 360 {
Chris@82 361 E T8, T9, Tb, Tc;
Chris@82 362 T8 = R0[WS(rs, 2)];
Chris@82 363 T9 = R0[WS(rs, 10)];
Chris@82 364 Ta = T8 + T9;
Chris@82 365 Tw = T8 - T9;
Chris@82 366 Tb = R0[WS(rs, 14)];
Chris@82 367 Tc = R0[WS(rs, 6)];
Chris@82 368 Td = Tb + Tc;
Chris@82 369 Tx = Tb - Tc;
Chris@82 370 }
Chris@82 371 Te = Ta + Td;
Chris@82 372 T2o = Td - Ta;
Chris@82 373 Ty = KP707106781 * (Tw + Tx);
Chris@82 374 T1k = KP707106781 * (Tx - Tw);
Chris@82 375 }
Chris@82 376 {
Chris@82 377 E Tp, TD, Ts, TE;
Chris@82 378 {
Chris@82 379 E Tn, To, Tq, Tr;
Chris@82 380 Tn = R0[WS(rs, 15)];
Chris@82 381 To = R0[WS(rs, 7)];
Chris@82 382 Tp = Tn + To;
Chris@82 383 TD = Tn - To;
Chris@82 384 Tq = R0[WS(rs, 3)];
Chris@82 385 Tr = R0[WS(rs, 11)];
Chris@82 386 Ts = Tq + Tr;
Chris@82 387 TE = Tq - Tr;
Chris@82 388 }
Chris@82 389 Tt = Tp + Ts;
Chris@82 390 T2d = Tp - Ts;
Chris@82 391 TF = FMA(KP923879532, TD, KP382683432 * TE);
Chris@82 392 T1h = FNMS(KP923879532, TE, KP382683432 * TD);
Chris@82 393 }
Chris@82 394 {
Chris@82 395 E Ti, TA, Tl, TB;
Chris@82 396 {
Chris@82 397 E Tg, Th, Tj, Tk;
Chris@82 398 Tg = R0[WS(rs, 1)];
Chris@82 399 Th = R0[WS(rs, 9)];
Chris@82 400 Ti = Tg + Th;
Chris@82 401 TA = Tg - Th;
Chris@82 402 Tj = R0[WS(rs, 5)];
Chris@82 403 Tk = R0[WS(rs, 13)];
Chris@82 404 Tl = Tj + Tk;
Chris@82 405 TB = Tj - Tk;
Chris@82 406 }
Chris@82 407 Tm = Ti + Tl;
Chris@82 408 T2c = Ti - Tl;
Chris@82 409 TC = FNMS(KP382683432, TB, KP923879532 * TA);
Chris@82 410 T1i = FMA(KP382683432, TA, KP923879532 * TB);
Chris@82 411 }
Chris@82 412 {
Chris@82 413 E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18;
Chris@82 414 {
Chris@82 415 E TZ, T10, T1b, T1c;
Chris@82 416 TZ = R1[WS(rs, 15)];
Chris@82 417 T10 = R1[WS(rs, 7)];
Chris@82 418 T11 = TZ - T10;
Chris@82 419 T1X = TZ + T10;
Chris@82 420 T1b = R1[WS(rs, 3)];
Chris@82 421 T1c = R1[WS(rs, 11)];
Chris@82 422 T1d = T1b - T1c;
Chris@82 423 T1Y = T1b + T1c;
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T12, T13, T15, T16;
Chris@82 427 T12 = R1[WS(rs, 1)];
Chris@82 428 T13 = R1[WS(rs, 9)];
Chris@82 429 T14 = T12 - T13;
Chris@82 430 T20 = T12 + T13;
Chris@82 431 T15 = R1[WS(rs, 13)];
Chris@82 432 T16 = R1[WS(rs, 5)];
Chris@82 433 T17 = T15 - T16;
Chris@82 434 T21 = T15 + T16;
Chris@82 435 }
Chris@82 436 T1Z = T1X + T1Y;
Chris@82 437 T22 = T20 + T21;
Chris@82 438 T2k = T21 - T20;
Chris@82 439 T2j = T1X - T1Y;
Chris@82 440 T1a = KP707106781 * (T17 - T14);
Chris@82 441 T1e = T1a - T1d;
Chris@82 442 T1C = T1d + T1a;
Chris@82 443 T18 = KP707106781 * (T14 + T17);
Chris@82 444 T19 = T11 + T18;
Chris@82 445 T1B = T11 - T18;
Chris@82 446 }
Chris@82 447 {
Chris@82 448 E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR;
Chris@82 449 {
Chris@82 450 E TI, TJ, TU, TV;
Chris@82 451 TI = R1[0];
Chris@82 452 TJ = R1[WS(rs, 8)];
Chris@82 453 TK = TI - TJ;
Chris@82 454 T1Q = TI + TJ;
Chris@82 455 TU = R1[WS(rs, 4)];
Chris@82 456 TV = R1[WS(rs, 12)];
Chris@82 457 TW = TU - TV;
Chris@82 458 T1R = TU + TV;
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E TL, TM, TO, TP;
Chris@82 462 TL = R1[WS(rs, 2)];
Chris@82 463 TM = R1[WS(rs, 10)];
Chris@82 464 TN = TL - TM;
Chris@82 465 T1T = TL + TM;
Chris@82 466 TO = R1[WS(rs, 14)];
Chris@82 467 TP = R1[WS(rs, 6)];
Chris@82 468 TQ = TO - TP;
Chris@82 469 T1U = TO + TP;
Chris@82 470 }
Chris@82 471 T1S = T1Q + T1R;
Chris@82 472 T1V = T1T + T1U;
Chris@82 473 T2h = T1U - T1T;
Chris@82 474 T2g = T1Q - T1R;
Chris@82 475 TT = KP707106781 * (TQ - TN);
Chris@82 476 TX = TT - TW;
Chris@82 477 T1z = TW + TT;
Chris@82 478 TR = KP707106781 * (TN + TQ);
Chris@82 479 TS = TK + TR;
Chris@82 480 T1y = TK - TR;
Chris@82 481 }
Chris@82 482 {
Chris@82 483 E Tf, Tu, T27, T28, T29, T2a;
Chris@82 484 Tf = T7 + Te;
Chris@82 485 Tu = Tm + Tt;
Chris@82 486 T27 = Tf + Tu;
Chris@82 487 T28 = T1S + T1V;
Chris@82 488 T29 = T1Z + T22;
Chris@82 489 T2a = T28 + T29;
Chris@82 490 Cr[WS(csr, 8)] = Tf - Tu;
Chris@82 491 Ci[WS(csi, 8)] = T29 - T28;
Chris@82 492 Cr[WS(csr, 16)] = T27 - T2a;
Chris@82 493 Cr[0] = T27 + T2a;
Chris@82 494 }
Chris@82 495 {
Chris@82 496 E T1P, T25, T24, T26, T1W, T23;
Chris@82 497 T1P = T7 - Te;
Chris@82 498 T25 = Tt - Tm;
Chris@82 499 T1W = T1S - T1V;
Chris@82 500 T23 = T1Z - T22;
Chris@82 501 T24 = KP707106781 * (T1W + T23);
Chris@82 502 T26 = KP707106781 * (T23 - T1W);
Chris@82 503 Cr[WS(csr, 12)] = T1P - T24;
Chris@82 504 Ci[WS(csi, 12)] = T26 - T25;
Chris@82 505 Cr[WS(csr, 4)] = T1P + T24;
Chris@82 506 Ci[WS(csi, 4)] = T25 + T26;
Chris@82 507 }
Chris@82 508 {
Chris@82 509 E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n;
Chris@82 510 T2e = KP707106781 * (T2c + T2d);
Chris@82 511 T2f = T2b + T2e;
Chris@82 512 T2v = T2b - T2e;
Chris@82 513 T2n = KP707106781 * (T2d - T2c);
Chris@82 514 T2p = T2n - T2o;
Chris@82 515 T2r = T2o + T2n;
Chris@82 516 {
Chris@82 517 E T2i, T2l, T2s, T2t;
Chris@82 518 T2i = FMA(KP923879532, T2g, KP382683432 * T2h);
Chris@82 519 T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
Chris@82 520 T2m = T2i + T2l;
Chris@82 521 T2q = T2l - T2i;
Chris@82 522 T2s = FNMS(KP382683432, T2g, KP923879532 * T2h);
Chris@82 523 T2t = FMA(KP382683432, T2j, KP923879532 * T2k);
Chris@82 524 T2u = T2s + T2t;
Chris@82 525 T2w = T2t - T2s;
Chris@82 526 }
Chris@82 527 Cr[WS(csr, 14)] = T2f - T2m;
Chris@82 528 Ci[WS(csi, 14)] = T2u - T2r;
Chris@82 529 Cr[WS(csr, 2)] = T2f + T2m;
Chris@82 530 Ci[WS(csi, 2)] = T2r + T2u;
Chris@82 531 Ci[WS(csi, 6)] = T2p + T2q;
Chris@82 532 Cr[WS(csr, 6)] = T2v + T2w;
Chris@82 533 Ci[WS(csi, 10)] = T2q - T2p;
Chris@82 534 Cr[WS(csr, 10)] = T2v - T2w;
Chris@82 535 }
Chris@82 536 {
Chris@82 537 E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
Chris@82 538 {
Chris@82 539 E Tz, TG, T1q, T1r;
Chris@82 540 Tz = Tv + Ty;
Chris@82 541 TG = TC + TF;
Chris@82 542 TH = Tz + TG;
Chris@82 543 T1t = Tz - TG;
Chris@82 544 T1q = FNMS(KP195090322, TS, KP980785280 * TX);
Chris@82 545 T1r = FMA(KP195090322, T19, KP980785280 * T1e);
Chris@82 546 T1s = T1q + T1r;
Chris@82 547 T1u = T1r - T1q;
Chris@82 548 }
Chris@82 549 {
Chris@82 550 E TY, T1f, T1j, T1m;
Chris@82 551 TY = FMA(KP980785280, TS, KP195090322 * TX);
Chris@82 552 T1f = FNMS(KP195090322, T1e, KP980785280 * T19);
Chris@82 553 T1g = TY + T1f;
Chris@82 554 T1o = T1f - TY;
Chris@82 555 T1j = T1h - T1i;
Chris@82 556 T1m = T1k - T1l;
Chris@82 557 T1n = T1j - T1m;
Chris@82 558 T1p = T1m + T1j;
Chris@82 559 }
Chris@82 560 Cr[WS(csr, 15)] = TH - T1g;
Chris@82 561 Ci[WS(csi, 15)] = T1s - T1p;
Chris@82 562 Cr[WS(csr, 1)] = TH + T1g;
Chris@82 563 Ci[WS(csi, 1)] = T1p + T1s;
Chris@82 564 Ci[WS(csi, 7)] = T1n + T1o;
Chris@82 565 Cr[WS(csr, 7)] = T1t + T1u;
Chris@82 566 Ci[WS(csi, 9)] = T1o - T1n;
Chris@82 567 Cr[WS(csr, 9)] = T1t - T1u;
Chris@82 568 }
Chris@82 569 {
Chris@82 570 E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
Chris@82 571 {
Chris@82 572 E T1v, T1w, T1K, T1L;
Chris@82 573 T1v = Tv - Ty;
Chris@82 574 T1w = T1i + T1h;
Chris@82 575 T1x = T1v + T1w;
Chris@82 576 T1N = T1v - T1w;
Chris@82 577 T1K = FNMS(KP555570233, T1y, KP831469612 * T1z);
Chris@82 578 T1L = FMA(KP555570233, T1B, KP831469612 * T1C);
Chris@82 579 T1M = T1K + T1L;
Chris@82 580 T1O = T1L - T1K;
Chris@82 581 }
Chris@82 582 {
Chris@82 583 E T1A, T1D, T1F, T1G;
Chris@82 584 T1A = FMA(KP831469612, T1y, KP555570233 * T1z);
Chris@82 585 T1D = FNMS(KP555570233, T1C, KP831469612 * T1B);
Chris@82 586 T1E = T1A + T1D;
Chris@82 587 T1I = T1D - T1A;
Chris@82 588 T1F = TF - TC;
Chris@82 589 T1G = T1l + T1k;
Chris@82 590 T1H = T1F - T1G;
Chris@82 591 T1J = T1G + T1F;
Chris@82 592 }
Chris@82 593 Cr[WS(csr, 13)] = T1x - T1E;
Chris@82 594 Ci[WS(csi, 13)] = T1M - T1J;
Chris@82 595 Cr[WS(csr, 3)] = T1x + T1E;
Chris@82 596 Ci[WS(csi, 3)] = T1J + T1M;
Chris@82 597 Ci[WS(csi, 5)] = T1H + T1I;
Chris@82 598 Cr[WS(csr, 5)] = T1N + T1O;
Chris@82 599 Ci[WS(csi, 11)] = T1I - T1H;
Chris@82 600 Cr[WS(csr, 11)] = T1N - T1O;
Chris@82 601 }
Chris@82 602 }
Chris@82 603 }
Chris@82 604 }
Chris@82 605
Chris@82 606 static const kr2c_desc desc = { 32, "r2cf_32", {140, 26, 16, 0}, &GENUS };
Chris@82 607
Chris@82 608 void X(codelet_r2cf_32) (planner *p) {
Chris@82 609 X(kr2c_register) (p, r2cf_32, &desc);
Chris@82 610 }
Chris@82 611
Chris@82 612 #endif