annotate src/fftw-3.3.5/rdft/scalar/r2cb/hc2cbdft_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:51:58 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 142 FP additions, 68 FP multiplications,
Chris@42 32 * (or, 96 additions, 22 multiplications, 46 fused multiply/add),
Chris@42 33 * 81 stack variables, 2 constants, and 48 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cb.h"
Chris@42 36
Chris@42 37 static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 41 {
Chris@42 42 INT m;
Chris@42 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@42 44 E T2S, T2V, T2w, T2Z, T2T, T2I, T2Q, T2Y, T2U, T2K, T2G, T30, T2W;
Chris@42 45 {
Chris@42 46 E Tb, T1Z, T2D, T1E, T1N, T2y, TD, T2t, T1U, T1e, T2o, TY, T1f, TI, T1g;
Chris@42 47 E TN, Tm, T1V, T2z, T1H, T1Q, T2E, T19, T2u;
Chris@42 48 {
Chris@42 49 E T1c, TU, T1d, TX;
Chris@42 50 {
Chris@42 51 E Tu, T6, TT, TS, T5, Tt, Tw, Tx, TB, T9, Ty;
Chris@42 52 {
Chris@42 53 E T1, Tp, Tq, Tr, T4, T2, T3, T7, T8, Ts;
Chris@42 54 T1 = Rp[0];
Chris@42 55 T2 = Rp[WS(rs, 4)];
Chris@42 56 T3 = Rm[WS(rs, 3)];
Chris@42 57 Tp = Ip[0];
Chris@42 58 Tq = Ip[WS(rs, 4)];
Chris@42 59 Tr = Im[WS(rs, 3)];
Chris@42 60 T4 = T2 + T3;
Chris@42 61 Tu = T2 - T3;
Chris@42 62 T6 = Rm[WS(rs, 5)];
Chris@42 63 TT = Tr + Tq;
Chris@42 64 Ts = Tq - Tr;
Chris@42 65 TS = FNMS(KP500000000, T4, T1);
Chris@42 66 T5 = T1 + T4;
Chris@42 67 T7 = Rm[WS(rs, 1)];
Chris@42 68 T8 = Rp[WS(rs, 2)];
Chris@42 69 T1c = Tp + Ts;
Chris@42 70 Tt = FNMS(KP500000000, Ts, Tp);
Chris@42 71 Tw = Im[WS(rs, 5)];
Chris@42 72 Tx = Im[WS(rs, 1)];
Chris@42 73 TB = T7 - T8;
Chris@42 74 T9 = T7 + T8;
Chris@42 75 Ty = Ip[WS(rs, 2)];
Chris@42 76 }
Chris@42 77 {
Chris@42 78 E T1L, Tv, Ta, TV, TW, Tz;
Chris@42 79 T1L = FNMS(KP866025403, Tu, Tt);
Chris@42 80 Tv = FMA(KP866025403, Tu, Tt);
Chris@42 81 Ta = T6 + T9;
Chris@42 82 TV = FNMS(KP500000000, T9, T6);
Chris@42 83 TW = Tx + Ty;
Chris@42 84 Tz = Tx - Ty;
Chris@42 85 {
Chris@42 86 E TC, T1M, T1C, TA, T1D;
Chris@42 87 T1C = FMA(KP866025403, TT, TS);
Chris@42 88 TU = FNMS(KP866025403, TT, TS);
Chris@42 89 T1d = Tw + Tz;
Chris@42 90 TA = FNMS(KP500000000, Tz, Tw);
Chris@42 91 T1D = FNMS(KP866025403, TW, TV);
Chris@42 92 TX = FMA(KP866025403, TW, TV);
Chris@42 93 Tb = T5 + Ta;
Chris@42 94 T1Z = T5 - Ta;
Chris@42 95 TC = FNMS(KP866025403, TB, TA);
Chris@42 96 T1M = FMA(KP866025403, TB, TA);
Chris@42 97 T2D = T1C - T1D;
Chris@42 98 T1E = T1C + T1D;
Chris@42 99 T1N = T1L - T1M;
Chris@42 100 T2y = T1L + T1M;
Chris@42 101 TD = Tv + TC;
Chris@42 102 T2t = Tv - TC;
Chris@42 103 }
Chris@42 104 }
Chris@42 105 }
Chris@42 106 {
Chris@42 107 E T12, Th, TH, TE, Tg, T11, T14, TK, T17, Tk, TL;
Chris@42 108 {
Chris@42 109 E Tc, TZ, TF, TG, Tf, Td, Te, Ti, Tj, T10;
Chris@42 110 Tc = Rp[WS(rs, 3)];
Chris@42 111 T1U = T1c + T1d;
Chris@42 112 T1e = T1c - T1d;
Chris@42 113 T2o = TU + TX;
Chris@42 114 TY = TU - TX;
Chris@42 115 Td = Rm[WS(rs, 4)];
Chris@42 116 Te = Rm[0];
Chris@42 117 TZ = Ip[WS(rs, 3)];
Chris@42 118 TF = Im[WS(rs, 4)];
Chris@42 119 TG = Im[0];
Chris@42 120 Tf = Td + Te;
Chris@42 121 T12 = Td - Te;
Chris@42 122 Th = Rm[WS(rs, 2)];
Chris@42 123 TH = TF - TG;
Chris@42 124 T10 = TF + TG;
Chris@42 125 TE = FNMS(KP500000000, Tf, Tc);
Chris@42 126 Tg = Tc + Tf;
Chris@42 127 Ti = Rp[WS(rs, 1)];
Chris@42 128 Tj = Rp[WS(rs, 5)];
Chris@42 129 T1f = TZ - T10;
Chris@42 130 T11 = FMA(KP500000000, T10, TZ);
Chris@42 131 T14 = Im[WS(rs, 2)];
Chris@42 132 TK = Ip[WS(rs, 5)];
Chris@42 133 T17 = Ti - Tj;
Chris@42 134 Tk = Ti + Tj;
Chris@42 135 TL = Ip[WS(rs, 1)];
Chris@42 136 }
Chris@42 137 {
Chris@42 138 E T1O, T13, Tl, TJ, TM, T15;
Chris@42 139 T1O = FNMS(KP866025403, T12, T11);
Chris@42 140 T13 = FMA(KP866025403, T12, T11);
Chris@42 141 Tl = Th + Tk;
Chris@42 142 TJ = FNMS(KP500000000, Tk, Th);
Chris@42 143 TM = TK - TL;
Chris@42 144 T15 = TK + TL;
Chris@42 145 {
Chris@42 146 E T18, T1P, T1F, T16, T1G;
Chris@42 147 T1F = FNMS(KP866025403, TH, TE);
Chris@42 148 TI = FMA(KP866025403, TH, TE);
Chris@42 149 T1g = T15 - T14;
Chris@42 150 T16 = FMA(KP500000000, T15, T14);
Chris@42 151 T1G = FNMS(KP866025403, TM, TJ);
Chris@42 152 TN = FMA(KP866025403, TM, TJ);
Chris@42 153 Tm = Tg + Tl;
Chris@42 154 T1V = Tg - Tl;
Chris@42 155 T18 = FNMS(KP866025403, T17, T16);
Chris@42 156 T1P = FMA(KP866025403, T17, T16);
Chris@42 157 T2z = T1F - T1G;
Chris@42 158 T1H = T1F + T1G;
Chris@42 159 T1Q = T1O - T1P;
Chris@42 160 T2E = T1O + T1P;
Chris@42 161 T19 = T13 + T18;
Chris@42 162 T2u = T13 - T18;
Chris@42 163 }
Chris@42 164 }
Chris@42 165 }
Chris@42 166 }
Chris@42 167 {
Chris@42 168 E T20, T2p, T1v, T1s, T1q, T1y, T1u, T1z, T1t;
Chris@42 169 {
Chris@42 170 E T1m, Tn, T1a, T1p, T1i, To, TP, TR, T1h, TO;
Chris@42 171 T1m = Tb - Tm;
Chris@42 172 Tn = Tb + Tm;
Chris@42 173 T20 = T1f - T1g;
Chris@42 174 T1h = T1f + T1g;
Chris@42 175 T2p = TI + TN;
Chris@42 176 TO = TI - TN;
Chris@42 177 T1a = TY - T19;
Chris@42 178 T1v = TY + T19;
Chris@42 179 T1p = T1e - T1h;
Chris@42 180 T1i = T1e + T1h;
Chris@42 181 To = W[0];
Chris@42 182 T1s = TD - TO;
Chris@42 183 TP = TD + TO;
Chris@42 184 TR = W[1];
Chris@42 185 {
Chris@42 186 E T1l, T1o, T1n, T1x, T1r;
Chris@42 187 {
Chris@42 188 E T1j, TQ, T1k, T1b;
Chris@42 189 T1j = To * T1a;
Chris@42 190 TQ = To * TP;
Chris@42 191 T1l = W[10];
Chris@42 192 T1k = FNMS(TR, TP, T1j);
Chris@42 193 T1b = FMA(TR, T1a, TQ);
Chris@42 194 T1o = W[11];
Chris@42 195 T1n = T1l * T1m;
Chris@42 196 Im[0] = T1k - T1i;
Chris@42 197 Ip[0] = T1i + T1k;
Chris@42 198 Rm[0] = Tn + T1b;
Chris@42 199 Rp[0] = Tn - T1b;
Chris@42 200 T1x = T1o * T1m;
Chris@42 201 T1r = W[12];
Chris@42 202 }
Chris@42 203 T1q = FNMS(T1o, T1p, T1n);
Chris@42 204 T1y = FMA(T1l, T1p, T1x);
Chris@42 205 T1u = W[13];
Chris@42 206 T1z = T1r * T1v;
Chris@42 207 T1t = T1r * T1s;
Chris@42 208 }
Chris@42 209 }
Chris@42 210 {
Chris@42 211 E T2e, T2h, T1S, T2j, T2f, T26, T2c, T2m, T2g, T24, T22;
Chris@42 212 {
Chris@42 213 E T2b, T1R, T27, T2a, T1B, T29, T2l, T1K, T1J, T1W, T21, T25, T2d, T23, T1X;
Chris@42 214 E T1Y;
Chris@42 215 {
Chris@42 216 E T1I, T28, T1A, T1w, T1T;
Chris@42 217 T1A = FNMS(T1u, T1s, T1z);
Chris@42 218 T1w = FMA(T1u, T1v, T1t);
Chris@42 219 T1I = T1E - T1H;
Chris@42 220 T28 = T1E + T1H;
Chris@42 221 T2b = T1N + T1Q;
Chris@42 222 T1R = T1N - T1Q;
Chris@42 223 Im[WS(rs, 3)] = T1A - T1y;
Chris@42 224 Ip[WS(rs, 3)] = T1y + T1A;
Chris@42 225 Rm[WS(rs, 3)] = T1q + T1w;
Chris@42 226 Rp[WS(rs, 3)] = T1q - T1w;
Chris@42 227 T27 = W[14];
Chris@42 228 T2a = W[15];
Chris@42 229 T1B = W[2];
Chris@42 230 T29 = T27 * T28;
Chris@42 231 T2l = T2a * T28;
Chris@42 232 T1K = W[3];
Chris@42 233 T1J = T1B * T1I;
Chris@42 234 T1W = T1U - T1V;
Chris@42 235 T2e = T1V + T1U;
Chris@42 236 T2h = T1Z - T20;
Chris@42 237 T21 = T1Z + T20;
Chris@42 238 T25 = T1K * T1I;
Chris@42 239 T1T = W[4];
Chris@42 240 T2d = W[16];
Chris@42 241 T23 = T1T * T21;
Chris@42 242 T1X = T1T * T1W;
Chris@42 243 }
Chris@42 244 T1S = FNMS(T1K, T1R, T1J);
Chris@42 245 T2j = T2d * T2h;
Chris@42 246 T2f = T2d * T2e;
Chris@42 247 T26 = FMA(T1B, T1R, T25);
Chris@42 248 T1Y = W[5];
Chris@42 249 T2c = FNMS(T2a, T2b, T29);
Chris@42 250 T2m = FMA(T27, T2b, T2l);
Chris@42 251 T2g = W[17];
Chris@42 252 T24 = FNMS(T1Y, T1W, T23);
Chris@42 253 T22 = FMA(T1Y, T21, T1X);
Chris@42 254 }
Chris@42 255 {
Chris@42 256 E T2L, T2O, T2P, T2v, T2N, T2X, T2n, T2s, T2A, T2F, T2r, T2H, T2R, T2J, T2B;
Chris@42 257 E T2C;
Chris@42 258 {
Chris@42 259 E T2q, T2k, T2i, T2M, T2x;
Chris@42 260 T2k = FNMS(T2g, T2e, T2j);
Chris@42 261 T2i = FMA(T2g, T2h, T2f);
Chris@42 262 Im[WS(rs, 1)] = T24 - T26;
Chris@42 263 Ip[WS(rs, 1)] = T24 + T26;
Chris@42 264 Rm[WS(rs, 1)] = T22 + T1S;
Chris@42 265 Rp[WS(rs, 1)] = T1S - T22;
Chris@42 266 Im[WS(rs, 4)] = T2k - T2m;
Chris@42 267 Ip[WS(rs, 4)] = T2k + T2m;
Chris@42 268 Rm[WS(rs, 4)] = T2i + T2c;
Chris@42 269 Rp[WS(rs, 4)] = T2c - T2i;
Chris@42 270 T2q = T2o + T2p;
Chris@42 271 T2M = T2o - T2p;
Chris@42 272 T2L = W[18];
Chris@42 273 T2O = W[19];
Chris@42 274 T2P = T2t - T2u;
Chris@42 275 T2v = T2t + T2u;
Chris@42 276 T2N = T2L * T2M;
Chris@42 277 T2X = T2O * T2M;
Chris@42 278 T2n = W[6];
Chris@42 279 T2s = W[7];
Chris@42 280 T2S = T2y - T2z;
Chris@42 281 T2A = T2y + T2z;
Chris@42 282 T2F = T2D - T2E;
Chris@42 283 T2V = T2D + T2E;
Chris@42 284 T2r = T2n * T2q;
Chris@42 285 T2H = T2s * T2q;
Chris@42 286 T2x = W[8];
Chris@42 287 T2R = W[20];
Chris@42 288 T2J = T2x * T2F;
Chris@42 289 T2B = T2x * T2A;
Chris@42 290 }
Chris@42 291 T2w = FNMS(T2s, T2v, T2r);
Chris@42 292 T2Z = T2R * T2V;
Chris@42 293 T2T = T2R * T2S;
Chris@42 294 T2I = FMA(T2n, T2v, T2H);
Chris@42 295 T2C = W[9];
Chris@42 296 T2Q = FNMS(T2O, T2P, T2N);
Chris@42 297 T2Y = FMA(T2L, T2P, T2X);
Chris@42 298 T2U = W[21];
Chris@42 299 T2K = FNMS(T2C, T2A, T2J);
Chris@42 300 T2G = FMA(T2C, T2F, T2B);
Chris@42 301 }
Chris@42 302 }
Chris@42 303 }
Chris@42 304 }
Chris@42 305 T30 = FNMS(T2U, T2S, T2Z);
Chris@42 306 T2W = FMA(T2U, T2V, T2T);
Chris@42 307 Im[WS(rs, 2)] = T2K - T2I;
Chris@42 308 Ip[WS(rs, 2)] = T2I + T2K;
Chris@42 309 Rm[WS(rs, 2)] = T2w + T2G;
Chris@42 310 Rp[WS(rs, 2)] = T2w - T2G;
Chris@42 311 Im[WS(rs, 5)] = T30 - T2Y;
Chris@42 312 Ip[WS(rs, 5)] = T2Y + T30;
Chris@42 313 Rm[WS(rs, 5)] = T2Q + T2W;
Chris@42 314 Rp[WS(rs, 5)] = T2Q - T2W;
Chris@42 315 }
Chris@42 316 }
Chris@42 317 }
Chris@42 318
Chris@42 319 static const tw_instr twinstr[] = {
Chris@42 320 {TW_FULL, 1, 12},
Chris@42 321 {TW_NEXT, 1, 0}
Chris@42 322 };
Chris@42 323
Chris@42 324 static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {96, 22, 46, 0} };
Chris@42 325
Chris@42 326 void X(codelet_hc2cbdft_12) (planner *p) {
Chris@42 327 X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
Chris@42 328 }
Chris@42 329 #else /* HAVE_FMA */
Chris@42 330
Chris@42 331 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include hc2cb.h */
Chris@42 332
Chris@42 333 /*
Chris@42 334 * This function contains 142 FP additions, 60 FP multiplications,
Chris@42 335 * (or, 112 additions, 30 multiplications, 30 fused multiply/add),
Chris@42 336 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@42 337 */
Chris@42 338 #include "hc2cb.h"
Chris@42 339
Chris@42 340 static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 341 {
Chris@42 342 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 343 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 344 {
Chris@42 345 INT m;
Chris@42 346 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@42 347 E Tv, T1E, TC, T1F, TW, T1x, TT, T1w, T1d, T1N, Tb, T1R, TI, T1z, TN;
Chris@42 348 E T1A, T17, T1I, T12, T1H, T1g, T1S, Tm, T1O;
Chris@42 349 {
Chris@42 350 E T1, Tq, T6, TA, T4, Tp, Tt, TS, T9, Tw, Tz, TV;
Chris@42 351 T1 = Rp[0];
Chris@42 352 Tq = Ip[0];
Chris@42 353 T6 = Rm[WS(rs, 5)];
Chris@42 354 TA = Im[WS(rs, 5)];
Chris@42 355 {
Chris@42 356 E T2, T3, Tr, Ts;
Chris@42 357 T2 = Rp[WS(rs, 4)];
Chris@42 358 T3 = Rm[WS(rs, 3)];
Chris@42 359 T4 = T2 + T3;
Chris@42 360 Tp = KP866025403 * (T2 - T3);
Chris@42 361 Tr = Im[WS(rs, 3)];
Chris@42 362 Ts = Ip[WS(rs, 4)];
Chris@42 363 Tt = Tr - Ts;
Chris@42 364 TS = KP866025403 * (Tr + Ts);
Chris@42 365 }
Chris@42 366 {
Chris@42 367 E T7, T8, Tx, Ty;
Chris@42 368 T7 = Rm[WS(rs, 1)];
Chris@42 369 T8 = Rp[WS(rs, 2)];
Chris@42 370 T9 = T7 + T8;
Chris@42 371 Tw = KP866025403 * (T7 - T8);
Chris@42 372 Tx = Im[WS(rs, 1)];
Chris@42 373 Ty = Ip[WS(rs, 2)];
Chris@42 374 Tz = Tx - Ty;
Chris@42 375 TV = KP866025403 * (Tx + Ty);
Chris@42 376 }
Chris@42 377 {
Chris@42 378 E Tu, TB, TU, TR;
Chris@42 379 Tu = FMA(KP500000000, Tt, Tq);
Chris@42 380 Tv = Tp + Tu;
Chris@42 381 T1E = Tu - Tp;
Chris@42 382 TB = FMS(KP500000000, Tz, TA);
Chris@42 383 TC = Tw + TB;
Chris@42 384 T1F = TB - Tw;
Chris@42 385 TU = FNMS(KP500000000, T9, T6);
Chris@42 386 TW = TU + TV;
Chris@42 387 T1x = TU - TV;
Chris@42 388 TR = FNMS(KP500000000, T4, T1);
Chris@42 389 TT = TR - TS;
Chris@42 390 T1w = TR + TS;
Chris@42 391 {
Chris@42 392 E T1b, T1c, T5, Ta;
Chris@42 393 T1b = Tq - Tt;
Chris@42 394 T1c = Tz + TA;
Chris@42 395 T1d = T1b - T1c;
Chris@42 396 T1N = T1b + T1c;
Chris@42 397 T5 = T1 + T4;
Chris@42 398 Ta = T6 + T9;
Chris@42 399 Tb = T5 + Ta;
Chris@42 400 T1R = T5 - Ta;
Chris@42 401 }
Chris@42 402 }
Chris@42 403 }
Chris@42 404 {
Chris@42 405 E Tc, T10, Th, T15, Tf, TY, TH, TZ, Tk, T13, TM, T14;
Chris@42 406 Tc = Rp[WS(rs, 3)];
Chris@42 407 T10 = Ip[WS(rs, 3)];
Chris@42 408 Th = Rm[WS(rs, 2)];
Chris@42 409 T15 = Im[WS(rs, 2)];
Chris@42 410 {
Chris@42 411 E Td, Te, TF, TG;
Chris@42 412 Td = Rm[WS(rs, 4)];
Chris@42 413 Te = Rm[0];
Chris@42 414 Tf = Td + Te;
Chris@42 415 TY = KP866025403 * (Td - Te);
Chris@42 416 TF = Im[WS(rs, 4)];
Chris@42 417 TG = Im[0];
Chris@42 418 TH = KP866025403 * (TF - TG);
Chris@42 419 TZ = TF + TG;
Chris@42 420 }
Chris@42 421 {
Chris@42 422 E Ti, Tj, TK, TL;
Chris@42 423 Ti = Rp[WS(rs, 1)];
Chris@42 424 Tj = Rp[WS(rs, 5)];
Chris@42 425 Tk = Ti + Tj;
Chris@42 426 T13 = KP866025403 * (Ti - Tj);
Chris@42 427 TK = Ip[WS(rs, 5)];
Chris@42 428 TL = Ip[WS(rs, 1)];
Chris@42 429 TM = KP866025403 * (TK - TL);
Chris@42 430 T14 = TK + TL;
Chris@42 431 }
Chris@42 432 {
Chris@42 433 E TE, TJ, T16, T11;
Chris@42 434 TE = FNMS(KP500000000, Tf, Tc);
Chris@42 435 TI = TE + TH;
Chris@42 436 T1z = TE - TH;
Chris@42 437 TJ = FNMS(KP500000000, Tk, Th);
Chris@42 438 TN = TJ + TM;
Chris@42 439 T1A = TJ - TM;
Chris@42 440 T16 = FMA(KP500000000, T14, T15);
Chris@42 441 T17 = T13 - T16;
Chris@42 442 T1I = T13 + T16;
Chris@42 443 T11 = FMA(KP500000000, TZ, T10);
Chris@42 444 T12 = TY + T11;
Chris@42 445 T1H = T11 - TY;
Chris@42 446 {
Chris@42 447 E T1e, T1f, Tg, Tl;
Chris@42 448 T1e = T10 - TZ;
Chris@42 449 T1f = T14 - T15;
Chris@42 450 T1g = T1e + T1f;
Chris@42 451 T1S = T1e - T1f;
Chris@42 452 Tg = Tc + Tf;
Chris@42 453 Tl = Th + Tk;
Chris@42 454 Tm = Tg + Tl;
Chris@42 455 T1O = Tg - Tl;
Chris@42 456 }
Chris@42 457 }
Chris@42 458 }
Chris@42 459 {
Chris@42 460 E Tn, T1h, TP, T1p, T19, T1r, T1n, T1t;
Chris@42 461 Tn = Tb + Tm;
Chris@42 462 T1h = T1d + T1g;
Chris@42 463 {
Chris@42 464 E TD, TO, TX, T18;
Chris@42 465 TD = Tv - TC;
Chris@42 466 TO = TI - TN;
Chris@42 467 TP = TD + TO;
Chris@42 468 T1p = TD - TO;
Chris@42 469 TX = TT - TW;
Chris@42 470 T18 = T12 - T17;
Chris@42 471 T19 = TX - T18;
Chris@42 472 T1r = TX + T18;
Chris@42 473 {
Chris@42 474 E T1k, T1m, T1j, T1l;
Chris@42 475 T1k = Tb - Tm;
Chris@42 476 T1m = T1d - T1g;
Chris@42 477 T1j = W[10];
Chris@42 478 T1l = W[11];
Chris@42 479 T1n = FNMS(T1l, T1m, T1j * T1k);
Chris@42 480 T1t = FMA(T1l, T1k, T1j * T1m);
Chris@42 481 }
Chris@42 482 }
Chris@42 483 {
Chris@42 484 E T1a, T1i, To, TQ;
Chris@42 485 To = W[0];
Chris@42 486 TQ = W[1];
Chris@42 487 T1a = FMA(To, TP, TQ * T19);
Chris@42 488 T1i = FNMS(TQ, TP, To * T19);
Chris@42 489 Rp[0] = Tn - T1a;
Chris@42 490 Ip[0] = T1h + T1i;
Chris@42 491 Rm[0] = Tn + T1a;
Chris@42 492 Im[0] = T1i - T1h;
Chris@42 493 }
Chris@42 494 {
Chris@42 495 E T1s, T1u, T1o, T1q;
Chris@42 496 T1o = W[12];
Chris@42 497 T1q = W[13];
Chris@42 498 T1s = FMA(T1o, T1p, T1q * T1r);
Chris@42 499 T1u = FNMS(T1q, T1p, T1o * T1r);
Chris@42 500 Rp[WS(rs, 3)] = T1n - T1s;
Chris@42 501 Ip[WS(rs, 3)] = T1t + T1u;
Chris@42 502 Rm[WS(rs, 3)] = T1n + T1s;
Chris@42 503 Im[WS(rs, 3)] = T1u - T1t;
Chris@42 504 }
Chris@42 505 }
Chris@42 506 {
Chris@42 507 E T1C, T1Y, T1K, T20, T1U, T1V, T26, T27;
Chris@42 508 {
Chris@42 509 E T1y, T1B, T1G, T1J;
Chris@42 510 T1y = T1w + T1x;
Chris@42 511 T1B = T1z + T1A;
Chris@42 512 T1C = T1y - T1B;
Chris@42 513 T1Y = T1y + T1B;
Chris@42 514 T1G = T1E + T1F;
Chris@42 515 T1J = T1H - T1I;
Chris@42 516 T1K = T1G - T1J;
Chris@42 517 T20 = T1G + T1J;
Chris@42 518 }
Chris@42 519 {
Chris@42 520 E T1P, T1T, T1M, T1Q;
Chris@42 521 T1P = T1N - T1O;
Chris@42 522 T1T = T1R + T1S;
Chris@42 523 T1M = W[4];
Chris@42 524 T1Q = W[5];
Chris@42 525 T1U = FMA(T1M, T1P, T1Q * T1T);
Chris@42 526 T1V = FNMS(T1Q, T1P, T1M * T1T);
Chris@42 527 }
Chris@42 528 {
Chris@42 529 E T23, T25, T22, T24;
Chris@42 530 T23 = T1O + T1N;
Chris@42 531 T25 = T1R - T1S;
Chris@42 532 T22 = W[16];
Chris@42 533 T24 = W[17];
Chris@42 534 T26 = FMA(T22, T23, T24 * T25);
Chris@42 535 T27 = FNMS(T24, T23, T22 * T25);
Chris@42 536 }
Chris@42 537 {
Chris@42 538 E T1L, T1W, T1v, T1D;
Chris@42 539 T1v = W[2];
Chris@42 540 T1D = W[3];
Chris@42 541 T1L = FNMS(T1D, T1K, T1v * T1C);
Chris@42 542 T1W = FMA(T1D, T1C, T1v * T1K);
Chris@42 543 Rp[WS(rs, 1)] = T1L - T1U;
Chris@42 544 Ip[WS(rs, 1)] = T1V + T1W;
Chris@42 545 Rm[WS(rs, 1)] = T1U + T1L;
Chris@42 546 Im[WS(rs, 1)] = T1V - T1W;
Chris@42 547 }
Chris@42 548 {
Chris@42 549 E T21, T28, T1X, T1Z;
Chris@42 550 T1X = W[14];
Chris@42 551 T1Z = W[15];
Chris@42 552 T21 = FNMS(T1Z, T20, T1X * T1Y);
Chris@42 553 T28 = FMA(T1Z, T1Y, T1X * T20);
Chris@42 554 Rp[WS(rs, 4)] = T21 - T26;
Chris@42 555 Ip[WS(rs, 4)] = T27 + T28;
Chris@42 556 Rm[WS(rs, 4)] = T26 + T21;
Chris@42 557 Im[WS(rs, 4)] = T27 - T28;
Chris@42 558 }
Chris@42 559 }
Chris@42 560 {
Chris@42 561 E T2c, T2u, T2p, T2B, T2g, T2w, T2l, T2z;
Chris@42 562 {
Chris@42 563 E T2a, T2b, T2n, T2o;
Chris@42 564 T2a = TT + TW;
Chris@42 565 T2b = TI + TN;
Chris@42 566 T2c = T2a + T2b;
Chris@42 567 T2u = T2a - T2b;
Chris@42 568 T2n = T1w - T1x;
Chris@42 569 T2o = T1H + T1I;
Chris@42 570 T2p = T2n - T2o;
Chris@42 571 T2B = T2n + T2o;
Chris@42 572 }
Chris@42 573 {
Chris@42 574 E T2e, T2f, T2j, T2k;
Chris@42 575 T2e = Tv + TC;
Chris@42 576 T2f = T12 + T17;
Chris@42 577 T2g = T2e + T2f;
Chris@42 578 T2w = T2e - T2f;
Chris@42 579 T2j = T1E - T1F;
Chris@42 580 T2k = T1z - T1A;
Chris@42 581 T2l = T2j + T2k;
Chris@42 582 T2z = T2j - T2k;
Chris@42 583 }
Chris@42 584 {
Chris@42 585 E T2h, T2r, T2q, T2s;
Chris@42 586 {
Chris@42 587 E T29, T2d, T2i, T2m;
Chris@42 588 T29 = W[6];
Chris@42 589 T2d = W[7];
Chris@42 590 T2h = FNMS(T2d, T2g, T29 * T2c);
Chris@42 591 T2r = FMA(T2d, T2c, T29 * T2g);
Chris@42 592 T2i = W[8];
Chris@42 593 T2m = W[9];
Chris@42 594 T2q = FMA(T2i, T2l, T2m * T2p);
Chris@42 595 T2s = FNMS(T2m, T2l, T2i * T2p);
Chris@42 596 }
Chris@42 597 Rp[WS(rs, 2)] = T2h - T2q;
Chris@42 598 Ip[WS(rs, 2)] = T2r + T2s;
Chris@42 599 Rm[WS(rs, 2)] = T2h + T2q;
Chris@42 600 Im[WS(rs, 2)] = T2s - T2r;
Chris@42 601 }
Chris@42 602 {
Chris@42 603 E T2x, T2D, T2C, T2E;
Chris@42 604 {
Chris@42 605 E T2t, T2v, T2y, T2A;
Chris@42 606 T2t = W[18];
Chris@42 607 T2v = W[19];
Chris@42 608 T2x = FNMS(T2v, T2w, T2t * T2u);
Chris@42 609 T2D = FMA(T2v, T2u, T2t * T2w);
Chris@42 610 T2y = W[20];
Chris@42 611 T2A = W[21];
Chris@42 612 T2C = FMA(T2y, T2z, T2A * T2B);
Chris@42 613 T2E = FNMS(T2A, T2z, T2y * T2B);
Chris@42 614 }
Chris@42 615 Rp[WS(rs, 5)] = T2x - T2C;
Chris@42 616 Ip[WS(rs, 5)] = T2D + T2E;
Chris@42 617 Rm[WS(rs, 5)] = T2x + T2C;
Chris@42 618 Im[WS(rs, 5)] = T2E - T2D;
Chris@42 619 }
Chris@42 620 }
Chris@42 621 }
Chris@42 622 }
Chris@42 623 }
Chris@42 624
Chris@42 625 static const tw_instr twinstr[] = {
Chris@42 626 {TW_FULL, 1, 12},
Chris@42 627 {TW_NEXT, 1, 0}
Chris@42 628 };
Chris@42 629
Chris@42 630 static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {112, 30, 30, 0} };
Chris@42 631
Chris@42 632 void X(codelet_hc2cbdft_12) (planner *p) {
Chris@42 633 X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
Chris@42 634 }
Chris@42 635 #endif /* HAVE_FMA */