annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:52 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cb_12 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 118 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 72 additions, 22 multiplications, 46 fused multiply/add),
Chris@82 33 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 44 E T18, T20, T1b, T21, T1s, T2a, T1p, T29, TI, TN, TO, Tb, To, T1f, T23;
Chris@82 45 E T1i, T24, T1z, T2d, T1w, T2c, Tt, Ty, Tz, Tm, TD;
Chris@82 46 {
Chris@82 47 E T1, TE, T6, TM, T4, T1o, TH, T17, T9, T1r, TL, T1a;
Chris@82 48 T1 = Rp[0];
Chris@82 49 TE = Ip[0];
Chris@82 50 T6 = Rm[WS(rs, 5)];
Chris@82 51 TM = Im[WS(rs, 5)];
Chris@82 52 {
Chris@82 53 E T2, T3, TF, TG;
Chris@82 54 T2 = Rp[WS(rs, 4)];
Chris@82 55 T3 = Rm[WS(rs, 3)];
Chris@82 56 T4 = T2 + T3;
Chris@82 57 T1o = T2 - T3;
Chris@82 58 TF = Ip[WS(rs, 4)];
Chris@82 59 TG = Im[WS(rs, 3)];
Chris@82 60 TH = TF - TG;
Chris@82 61 T17 = TF + TG;
Chris@82 62 }
Chris@82 63 {
Chris@82 64 E T7, T8, TJ, TK;
Chris@82 65 T7 = Rm[WS(rs, 1)];
Chris@82 66 T8 = Rp[WS(rs, 2)];
Chris@82 67 T9 = T7 + T8;
Chris@82 68 T1r = T7 - T8;
Chris@82 69 TJ = Ip[WS(rs, 2)];
Chris@82 70 TK = Im[WS(rs, 1)];
Chris@82 71 TL = TJ - TK;
Chris@82 72 T1a = TJ + TK;
Chris@82 73 }
Chris@82 74 {
Chris@82 75 E T16, T19, T1q, T1n, T5, Ta;
Chris@82 76 T16 = FNMS(KP500000000, T4, T1);
Chris@82 77 T18 = FNMS(KP866025403, T17, T16);
Chris@82 78 T20 = FMA(KP866025403, T17, T16);
Chris@82 79 T19 = FNMS(KP500000000, T9, T6);
Chris@82 80 T1b = FMA(KP866025403, T1a, T19);
Chris@82 81 T21 = FNMS(KP866025403, T1a, T19);
Chris@82 82 T1q = FMA(KP500000000, TL, TM);
Chris@82 83 T1s = FNMS(KP866025403, T1r, T1q);
Chris@82 84 T2a = FMA(KP866025403, T1r, T1q);
Chris@82 85 T1n = FNMS(KP500000000, TH, TE);
Chris@82 86 T1p = FMA(KP866025403, T1o, T1n);
Chris@82 87 T29 = FNMS(KP866025403, T1o, T1n);
Chris@82 88 TI = TE + TH;
Chris@82 89 TN = TL - TM;
Chris@82 90 TO = TI - TN;
Chris@82 91 T5 = T1 + T4;
Chris@82 92 Ta = T6 + T9;
Chris@82 93 Tb = T5 + Ta;
Chris@82 94 To = T5 - Ta;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Tc, Tp, Th, Tx, Tf, T1v, Ts, T1e, Tk, T1y, Tw, T1h;
Chris@82 99 Tc = Rp[WS(rs, 3)];
Chris@82 100 Tp = Ip[WS(rs, 3)];
Chris@82 101 Th = Rm[WS(rs, 2)];
Chris@82 102 Tx = Im[WS(rs, 2)];
Chris@82 103 {
Chris@82 104 E Td, Te, Tq, Tr;
Chris@82 105 Td = Rm[WS(rs, 4)];
Chris@82 106 Te = Rm[0];
Chris@82 107 Tf = Td + Te;
Chris@82 108 T1v = Td - Te;
Chris@82 109 Tq = Im[WS(rs, 4)];
Chris@82 110 Tr = Im[0];
Chris@82 111 Ts = Tq + Tr;
Chris@82 112 T1e = Tq - Tr;
Chris@82 113 }
Chris@82 114 {
Chris@82 115 E Ti, Tj, Tu, Tv;
Chris@82 116 Ti = Rp[WS(rs, 1)];
Chris@82 117 Tj = Rp[WS(rs, 5)];
Chris@82 118 Tk = Ti + Tj;
Chris@82 119 T1y = Ti - Tj;
Chris@82 120 Tu = Ip[WS(rs, 1)];
Chris@82 121 Tv = Ip[WS(rs, 5)];
Chris@82 122 Tw = Tu + Tv;
Chris@82 123 T1h = Tv - Tu;
Chris@82 124 }
Chris@82 125 {
Chris@82 126 E T1d, T1g, T1x, T1u, Tg, Tl;
Chris@82 127 T1d = FNMS(KP500000000, Tf, Tc);
Chris@82 128 T1f = FMA(KP866025403, T1e, T1d);
Chris@82 129 T23 = FNMS(KP866025403, T1e, T1d);
Chris@82 130 T1g = FNMS(KP500000000, Tk, Th);
Chris@82 131 T1i = FMA(KP866025403, T1h, T1g);
Chris@82 132 T24 = FNMS(KP866025403, T1h, T1g);
Chris@82 133 T1x = FMA(KP500000000, Tw, Tx);
Chris@82 134 T1z = FNMS(KP866025403, T1y, T1x);
Chris@82 135 T2d = FMA(KP866025403, T1y, T1x);
Chris@82 136 T1u = FMA(KP500000000, Ts, Tp);
Chris@82 137 T1w = FMA(KP866025403, T1v, T1u);
Chris@82 138 T2c = FNMS(KP866025403, T1v, T1u);
Chris@82 139 Tt = Tp - Ts;
Chris@82 140 Ty = Tw - Tx;
Chris@82 141 Tz = Tt - Ty;
Chris@82 142 Tg = Tc + Tf;
Chris@82 143 Tl = Th + Tk;
Chris@82 144 Tm = Tg + Tl;
Chris@82 145 TD = Tg - Tl;
Chris@82 146 }
Chris@82 147 }
Chris@82 148 Rp[0] = Tb + Tm;
Chris@82 149 {
Chris@82 150 E TA, TP, TB, TQ, Tn, TC;
Chris@82 151 TA = To - Tz;
Chris@82 152 TP = TD + TO;
Chris@82 153 Tn = W[16];
Chris@82 154 TB = Tn * TA;
Chris@82 155 TQ = Tn * TP;
Chris@82 156 TC = W[17];
Chris@82 157 Ip[WS(rs, 4)] = FNMS(TC, TP, TB);
Chris@82 158 Im[WS(rs, 4)] = FMA(TC, TA, TQ);
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E TS, TV, TT, TW, TR, TU;
Chris@82 162 TS = To + Tz;
Chris@82 163 TV = TO - TD;
Chris@82 164 TR = W[4];
Chris@82 165 TT = TR * TS;
Chris@82 166 TW = TR * TV;
Chris@82 167 TU = W[5];
Chris@82 168 Ip[WS(rs, 1)] = FNMS(TU, TV, TT);
Chris@82 169 Im[WS(rs, 1)] = FMA(TU, TS, TW);
Chris@82 170 }
Chris@82 171 {
Chris@82 172 E T11, T12, T13, TX, TZ, T10, T14, TY;
Chris@82 173 T11 = TI + TN;
Chris@82 174 T12 = Tt + Ty;
Chris@82 175 T13 = T11 - T12;
Chris@82 176 TY = Tb - Tm;
Chris@82 177 TX = W[10];
Chris@82 178 TZ = TX * TY;
Chris@82 179 T10 = W[11];
Chris@82 180 T14 = T10 * TY;
Chris@82 181 Rm[0] = T11 + T12;
Chris@82 182 Rm[WS(rs, 3)] = FMA(TX, T13, T14);
Chris@82 183 Rp[WS(rs, 3)] = FNMS(T10, T13, TZ);
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E T1k, T1E, T1B, T1H;
Chris@82 187 {
Chris@82 188 E T1c, T1j, T1t, T1A;
Chris@82 189 T1c = T18 + T1b;
Chris@82 190 T1j = T1f + T1i;
Chris@82 191 T1k = T1c - T1j;
Chris@82 192 T1E = T1c + T1j;
Chris@82 193 T1t = T1p - T1s;
Chris@82 194 T1A = T1w - T1z;
Chris@82 195 T1B = T1t - T1A;
Chris@82 196 T1H = T1t + T1A;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T15, T1l, T1m, T1C;
Chris@82 200 T15 = W[18];
Chris@82 201 T1l = T15 * T1k;
Chris@82 202 T1m = W[19];
Chris@82 203 T1C = T1m * T1k;
Chris@82 204 Rp[WS(rs, 5)] = FNMS(T1m, T1B, T1l);
Chris@82 205 Rm[WS(rs, 5)] = FMA(T15, T1B, T1C);
Chris@82 206 }
Chris@82 207 {
Chris@82 208 E T1D, T1F, T1G, T1I;
Chris@82 209 T1D = W[6];
Chris@82 210 T1F = T1D * T1E;
Chris@82 211 T1G = W[7];
Chris@82 212 T1I = T1G * T1E;
Chris@82 213 Rp[WS(rs, 2)] = FNMS(T1G, T1H, T1F);
Chris@82 214 Rm[WS(rs, 2)] = FMA(T1D, T1H, T1I);
Chris@82 215 }
Chris@82 216 }
Chris@82 217 {
Chris@82 218 E T26, T2i, T2f, T2l;
Chris@82 219 {
Chris@82 220 E T22, T25, T2b, T2e;
Chris@82 221 T22 = T20 + T21;
Chris@82 222 T25 = T23 + T24;
Chris@82 223 T26 = T22 - T25;
Chris@82 224 T2i = T22 + T25;
Chris@82 225 T2b = T29 - T2a;
Chris@82 226 T2e = T2c - T2d;
Chris@82 227 T2f = T2b - T2e;
Chris@82 228 T2l = T2b + T2e;
Chris@82 229 }
Chris@82 230 {
Chris@82 231 E T1Z, T27, T28, T2g;
Chris@82 232 T1Z = W[2];
Chris@82 233 T27 = T1Z * T26;
Chris@82 234 T28 = W[3];
Chris@82 235 T2g = T28 * T26;
Chris@82 236 Rp[WS(rs, 1)] = FNMS(T28, T2f, T27);
Chris@82 237 Rm[WS(rs, 1)] = FMA(T1Z, T2f, T2g);
Chris@82 238 }
Chris@82 239 {
Chris@82 240 E T2h, T2j, T2k, T2m;
Chris@82 241 T2h = W[14];
Chris@82 242 T2j = T2h * T2i;
Chris@82 243 T2k = W[15];
Chris@82 244 T2m = T2k * T2i;
Chris@82 245 Rp[WS(rs, 4)] = FNMS(T2k, T2l, T2j);
Chris@82 246 Rm[WS(rs, 4)] = FMA(T2h, T2l, T2m);
Chris@82 247 }
Chris@82 248 }
Chris@82 249 {
Chris@82 250 E T2q, T2y, T2v, T2B;
Chris@82 251 {
Chris@82 252 E T2o, T2p, T2t, T2u;
Chris@82 253 T2o = T20 - T21;
Chris@82 254 T2p = T2c + T2d;
Chris@82 255 T2q = T2o - T2p;
Chris@82 256 T2y = T2o + T2p;
Chris@82 257 T2t = T29 + T2a;
Chris@82 258 T2u = T23 - T24;
Chris@82 259 T2v = T2t + T2u;
Chris@82 260 T2B = T2t - T2u;
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E T2r, T2w, T2n, T2s;
Chris@82 264 T2n = W[8];
Chris@82 265 T2r = T2n * T2q;
Chris@82 266 T2w = T2n * T2v;
Chris@82 267 T2s = W[9];
Chris@82 268 Ip[WS(rs, 2)] = FNMS(T2s, T2v, T2r);
Chris@82 269 Im[WS(rs, 2)] = FMA(T2s, T2q, T2w);
Chris@82 270 }
Chris@82 271 {
Chris@82 272 E T2z, T2C, T2x, T2A;
Chris@82 273 T2x = W[20];
Chris@82 274 T2z = T2x * T2y;
Chris@82 275 T2C = T2x * T2B;
Chris@82 276 T2A = W[21];
Chris@82 277 Ip[WS(rs, 5)] = FNMS(T2A, T2B, T2z);
Chris@82 278 Im[WS(rs, 5)] = FMA(T2A, T2y, T2C);
Chris@82 279 }
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E T1M, T1U, T1R, T1X;
Chris@82 283 {
Chris@82 284 E T1K, T1L, T1P, T1Q;
Chris@82 285 T1K = T18 - T1b;
Chris@82 286 T1L = T1w + T1z;
Chris@82 287 T1M = T1K - T1L;
Chris@82 288 T1U = T1K + T1L;
Chris@82 289 T1P = T1p + T1s;
Chris@82 290 T1Q = T1f - T1i;
Chris@82 291 T1R = T1P + T1Q;
Chris@82 292 T1X = T1P - T1Q;
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E T1N, T1S, T1J, T1O;
Chris@82 296 T1J = W[0];
Chris@82 297 T1N = T1J * T1M;
Chris@82 298 T1S = T1J * T1R;
Chris@82 299 T1O = W[1];
Chris@82 300 Ip[0] = FNMS(T1O, T1R, T1N);
Chris@82 301 Im[0] = FMA(T1O, T1M, T1S);
Chris@82 302 }
Chris@82 303 {
Chris@82 304 E T1V, T1Y, T1T, T1W;
Chris@82 305 T1T = W[12];
Chris@82 306 T1V = T1T * T1U;
Chris@82 307 T1Y = T1T * T1X;
Chris@82 308 T1W = W[13];
Chris@82 309 Ip[WS(rs, 3)] = FNMS(T1W, T1X, T1V);
Chris@82 310 Im[WS(rs, 3)] = FMA(T1W, T1U, T1Y);
Chris@82 311 }
Chris@82 312 }
Chris@82 313 }
Chris@82 314 }
Chris@82 315 }
Chris@82 316
Chris@82 317 static const tw_instr twinstr[] = {
Chris@82 318 {TW_FULL, 1, 12},
Chris@82 319 {TW_NEXT, 1, 0}
Chris@82 320 };
Chris@82 321
Chris@82 322 static const hc2c_desc desc = { 12, "hc2cb_12", twinstr, &GENUS, {72, 22, 46, 0} };
Chris@82 323
Chris@82 324 void X(codelet_hc2cb_12) (planner *p) {
Chris@82 325 X(khc2c_register) (p, hc2cb_12, &desc, HC2C_VIA_RDFT);
Chris@82 326 }
Chris@82 327 #else
Chris@82 328
Chris@82 329 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cb_12 -include rdft/scalar/hc2cb.h */
Chris@82 330
Chris@82 331 /*
Chris@82 332 * This function contains 118 FP additions, 60 FP multiplications,
Chris@82 333 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 334 * 39 stack variables, 2 constants, and 48 memory accesses
Chris@82 335 */
Chris@82 336 #include "rdft/scalar/hc2cb.h"
Chris@82 337
Chris@82 338 static void hc2cb_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 339 {
Chris@82 340 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 341 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 342 {
Chris@82 343 INT m;
Chris@82 344 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 345 E T5, TH, T12, T1M, T1i, T1U, Tl, Ty, T1c, T1Y, T1s, T1Q, Ta, TM, T15;
Chris@82 346 E T1N, T1l, T1V, Tg, Tt, T19, T1X, T1p, T1P;
Chris@82 347 {
Chris@82 348 E T1, TD, T4, T1g, TG, T11, T10, T1h;
Chris@82 349 T1 = Rp[0];
Chris@82 350 TD = Ip[0];
Chris@82 351 {
Chris@82 352 E T2, T3, TE, TF;
Chris@82 353 T2 = Rp[WS(rs, 4)];
Chris@82 354 T3 = Rm[WS(rs, 3)];
Chris@82 355 T4 = T2 + T3;
Chris@82 356 T1g = KP866025403 * (T2 - T3);
Chris@82 357 TE = Ip[WS(rs, 4)];
Chris@82 358 TF = Im[WS(rs, 3)];
Chris@82 359 TG = TE - TF;
Chris@82 360 T11 = KP866025403 * (TE + TF);
Chris@82 361 }
Chris@82 362 T5 = T1 + T4;
Chris@82 363 TH = TD + TG;
Chris@82 364 T10 = FNMS(KP500000000, T4, T1);
Chris@82 365 T12 = T10 - T11;
Chris@82 366 T1M = T10 + T11;
Chris@82 367 T1h = FNMS(KP500000000, TG, TD);
Chris@82 368 T1i = T1g + T1h;
Chris@82 369 T1U = T1h - T1g;
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E Th, Tx, Tk, T1a, Tw, T1r, T1b, T1q;
Chris@82 373 Th = Rm[WS(rs, 2)];
Chris@82 374 Tx = Im[WS(rs, 2)];
Chris@82 375 {
Chris@82 376 E Ti, Tj, Tu, Tv;
Chris@82 377 Ti = Rp[WS(rs, 1)];
Chris@82 378 Tj = Rp[WS(rs, 5)];
Chris@82 379 Tk = Ti + Tj;
Chris@82 380 T1a = KP866025403 * (Ti - Tj);
Chris@82 381 Tu = Ip[WS(rs, 1)];
Chris@82 382 Tv = Ip[WS(rs, 5)];
Chris@82 383 Tw = Tu + Tv;
Chris@82 384 T1r = KP866025403 * (Tv - Tu);
Chris@82 385 }
Chris@82 386 Tl = Th + Tk;
Chris@82 387 Ty = Tw - Tx;
Chris@82 388 T1b = FMA(KP500000000, Tw, Tx);
Chris@82 389 T1c = T1a - T1b;
Chris@82 390 T1Y = T1a + T1b;
Chris@82 391 T1q = FNMS(KP500000000, Tk, Th);
Chris@82 392 T1s = T1q + T1r;
Chris@82 393 T1Q = T1q - T1r;
Chris@82 394 }
Chris@82 395 {
Chris@82 396 E T6, TL, T9, T1j, TK, T14, T13, T1k;
Chris@82 397 T6 = Rm[WS(rs, 5)];
Chris@82 398 TL = Im[WS(rs, 5)];
Chris@82 399 {
Chris@82 400 E T7, T8, TI, TJ;
Chris@82 401 T7 = Rm[WS(rs, 1)];
Chris@82 402 T8 = Rp[WS(rs, 2)];
Chris@82 403 T9 = T7 + T8;
Chris@82 404 T1j = KP866025403 * (T7 - T8);
Chris@82 405 TI = Ip[WS(rs, 2)];
Chris@82 406 TJ = Im[WS(rs, 1)];
Chris@82 407 TK = TI - TJ;
Chris@82 408 T14 = KP866025403 * (TI + TJ);
Chris@82 409 }
Chris@82 410 Ta = T6 + T9;
Chris@82 411 TM = TK - TL;
Chris@82 412 T13 = FNMS(KP500000000, T9, T6);
Chris@82 413 T15 = T13 + T14;
Chris@82 414 T1N = T13 - T14;
Chris@82 415 T1k = FMA(KP500000000, TK, TL);
Chris@82 416 T1l = T1j - T1k;
Chris@82 417 T1V = T1j + T1k;
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E Tc, Tp, Tf, T17, Ts, T1o, T18, T1n;
Chris@82 421 Tc = Rp[WS(rs, 3)];
Chris@82 422 Tp = Ip[WS(rs, 3)];
Chris@82 423 {
Chris@82 424 E Td, Te, Tq, Tr;
Chris@82 425 Td = Rm[WS(rs, 4)];
Chris@82 426 Te = Rm[0];
Chris@82 427 Tf = Td + Te;
Chris@82 428 T17 = KP866025403 * (Td - Te);
Chris@82 429 Tq = Im[WS(rs, 4)];
Chris@82 430 Tr = Im[0];
Chris@82 431 Ts = Tq + Tr;
Chris@82 432 T1o = KP866025403 * (Tq - Tr);
Chris@82 433 }
Chris@82 434 Tg = Tc + Tf;
Chris@82 435 Tt = Tp - Ts;
Chris@82 436 T18 = FMA(KP500000000, Ts, Tp);
Chris@82 437 T19 = T17 + T18;
Chris@82 438 T1X = T18 - T17;
Chris@82 439 T1n = FNMS(KP500000000, Tf, Tc);
Chris@82 440 T1p = T1n + T1o;
Chris@82 441 T1P = T1n - T1o;
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E Tb, Tm, TU, TW, TX, TY, TT, TV;
Chris@82 445 Tb = T5 + Ta;
Chris@82 446 Tm = Tg + Tl;
Chris@82 447 TU = Tb - Tm;
Chris@82 448 TW = TH + TM;
Chris@82 449 TX = Tt + Ty;
Chris@82 450 TY = TW - TX;
Chris@82 451 Rp[0] = Tb + Tm;
Chris@82 452 Rm[0] = TW + TX;
Chris@82 453 TT = W[10];
Chris@82 454 TV = W[11];
Chris@82 455 Rp[WS(rs, 3)] = FNMS(TV, TY, TT * TU);
Chris@82 456 Rm[WS(rs, 3)] = FMA(TV, TU, TT * TY);
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E TA, TQ, TO, TS;
Chris@82 460 {
Chris@82 461 E To, Tz, TC, TN;
Chris@82 462 To = T5 - Ta;
Chris@82 463 Tz = Tt - Ty;
Chris@82 464 TA = To - Tz;
Chris@82 465 TQ = To + Tz;
Chris@82 466 TC = Tg - Tl;
Chris@82 467 TN = TH - TM;
Chris@82 468 TO = TC + TN;
Chris@82 469 TS = TN - TC;
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E Tn, TB, TP, TR;
Chris@82 473 Tn = W[16];
Chris@82 474 TB = W[17];
Chris@82 475 Ip[WS(rs, 4)] = FNMS(TB, TO, Tn * TA);
Chris@82 476 Im[WS(rs, 4)] = FMA(Tn, TO, TB * TA);
Chris@82 477 TP = W[4];
Chris@82 478 TR = W[5];
Chris@82 479 Ip[WS(rs, 1)] = FNMS(TR, TS, TP * TQ);
Chris@82 480 Im[WS(rs, 1)] = FMA(TP, TS, TR * TQ);
Chris@82 481 }
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T28, T2e, T2c, T2g;
Chris@82 485 {
Chris@82 486 E T26, T27, T2a, T2b;
Chris@82 487 T26 = T1M - T1N;
Chris@82 488 T27 = T1X + T1Y;
Chris@82 489 T28 = T26 - T27;
Chris@82 490 T2e = T26 + T27;
Chris@82 491 T2a = T1U + T1V;
Chris@82 492 T2b = T1P - T1Q;
Chris@82 493 T2c = T2a + T2b;
Chris@82 494 T2g = T2a - T2b;
Chris@82 495 }
Chris@82 496 {
Chris@82 497 E T25, T29, T2d, T2f;
Chris@82 498 T25 = W[8];
Chris@82 499 T29 = W[9];
Chris@82 500 Ip[WS(rs, 2)] = FNMS(T29, T2c, T25 * T28);
Chris@82 501 Im[WS(rs, 2)] = FMA(T25, T2c, T29 * T28);
Chris@82 502 T2d = W[20];
Chris@82 503 T2f = W[21];
Chris@82 504 Ip[WS(rs, 5)] = FNMS(T2f, T2g, T2d * T2e);
Chris@82 505 Im[WS(rs, 5)] = FMA(T2d, T2g, T2f * T2e);
Chris@82 506 }
Chris@82 507 }
Chris@82 508 {
Chris@82 509 E T1S, T22, T20, T24;
Chris@82 510 {
Chris@82 511 E T1O, T1R, T1W, T1Z;
Chris@82 512 T1O = T1M + T1N;
Chris@82 513 T1R = T1P + T1Q;
Chris@82 514 T1S = T1O - T1R;
Chris@82 515 T22 = T1O + T1R;
Chris@82 516 T1W = T1U - T1V;
Chris@82 517 T1Z = T1X - T1Y;
Chris@82 518 T20 = T1W - T1Z;
Chris@82 519 T24 = T1W + T1Z;
Chris@82 520 }
Chris@82 521 {
Chris@82 522 E T1L, T1T, T21, T23;
Chris@82 523 T1L = W[2];
Chris@82 524 T1T = W[3];
Chris@82 525 Rp[WS(rs, 1)] = FNMS(T1T, T20, T1L * T1S);
Chris@82 526 Rm[WS(rs, 1)] = FMA(T1T, T1S, T1L * T20);
Chris@82 527 T21 = W[14];
Chris@82 528 T23 = W[15];
Chris@82 529 Rp[WS(rs, 4)] = FNMS(T23, T24, T21 * T22);
Chris@82 530 Rm[WS(rs, 4)] = FMA(T23, T22, T21 * T24);
Chris@82 531 }
Chris@82 532 }
Chris@82 533 {
Chris@82 534 E T1C, T1I, T1G, T1K;
Chris@82 535 {
Chris@82 536 E T1A, T1B, T1E, T1F;
Chris@82 537 T1A = T12 + T15;
Chris@82 538 T1B = T1p + T1s;
Chris@82 539 T1C = T1A - T1B;
Chris@82 540 T1I = T1A + T1B;
Chris@82 541 T1E = T1i + T1l;
Chris@82 542 T1F = T19 + T1c;
Chris@82 543 T1G = T1E - T1F;
Chris@82 544 T1K = T1E + T1F;
Chris@82 545 }
Chris@82 546 {
Chris@82 547 E T1z, T1D, T1H, T1J;
Chris@82 548 T1z = W[18];
Chris@82 549 T1D = W[19];
Chris@82 550 Rp[WS(rs, 5)] = FNMS(T1D, T1G, T1z * T1C);
Chris@82 551 Rm[WS(rs, 5)] = FMA(T1D, T1C, T1z * T1G);
Chris@82 552 T1H = W[6];
Chris@82 553 T1J = W[7];
Chris@82 554 Rp[WS(rs, 2)] = FNMS(T1J, T1K, T1H * T1I);
Chris@82 555 Rm[WS(rs, 2)] = FMA(T1J, T1I, T1H * T1K);
Chris@82 556 }
Chris@82 557 }
Chris@82 558 {
Chris@82 559 E T1e, T1w, T1u, T1y;
Chris@82 560 {
Chris@82 561 E T16, T1d, T1m, T1t;
Chris@82 562 T16 = T12 - T15;
Chris@82 563 T1d = T19 - T1c;
Chris@82 564 T1e = T16 - T1d;
Chris@82 565 T1w = T16 + T1d;
Chris@82 566 T1m = T1i - T1l;
Chris@82 567 T1t = T1p - T1s;
Chris@82 568 T1u = T1m + T1t;
Chris@82 569 T1y = T1m - T1t;
Chris@82 570 }
Chris@82 571 {
Chris@82 572 E TZ, T1f, T1v, T1x;
Chris@82 573 TZ = W[0];
Chris@82 574 T1f = W[1];
Chris@82 575 Ip[0] = FNMS(T1f, T1u, TZ * T1e);
Chris@82 576 Im[0] = FMA(TZ, T1u, T1f * T1e);
Chris@82 577 T1v = W[12];
Chris@82 578 T1x = W[13];
Chris@82 579 Ip[WS(rs, 3)] = FNMS(T1x, T1y, T1v * T1w);
Chris@82 580 Im[WS(rs, 3)] = FMA(T1v, T1y, T1x * T1w);
Chris@82 581 }
Chris@82 582 }
Chris@82 583 }
Chris@82 584 }
Chris@82 585 }
Chris@82 586
Chris@82 587 static const tw_instr twinstr[] = {
Chris@82 588 {TW_FULL, 1, 12},
Chris@82 589 {TW_NEXT, 1, 0}
Chris@82 590 };
Chris@82 591
Chris@82 592 static const hc2c_desc desc = { 12, "hc2cb_12", twinstr, &GENUS, {88, 30, 30, 0} };
Chris@82 593
Chris@82 594 void X(codelet_hc2cb_12) (planner *p) {
Chris@82 595 X(khc2c_register) (p, hc2cb_12, &desc, HC2C_VIA_RDFT);
Chris@82 596 }
Chris@82 597 #endif