annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cbdft_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:58 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 142 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 96 additions, 22 multiplications, 46 fused multiply/add),
Chris@82 33 * 55 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 44 E Tv, TC, TD, T1L, T1M, T2y, Tb, T1Z, T1E, T2D, T1e, T1U, TY, T2o, T13;
Chris@82 45 E T18, T19, T1O, T1P, T2E, Tm, T1V, T1H, T2z, T1h, T20, TO, T2p;
Chris@82 46 {
Chris@82 47 E T1, T4, Tu, TS, Tp, Ts, Tt, TT, T6, T9, TB, TV, Tw, Tz, TA;
Chris@82 48 E TW;
Chris@82 49 {
Chris@82 50 E T2, T3, Tq, Tr;
Chris@82 51 T1 = Rp[0];
Chris@82 52 T2 = Rp[WS(rs, 4)];
Chris@82 53 T3 = Rm[WS(rs, 3)];
Chris@82 54 T4 = T2 + T3;
Chris@82 55 Tu = T2 - T3;
Chris@82 56 TS = FNMS(KP500000000, T4, T1);
Chris@82 57 Tp = Ip[0];
Chris@82 58 Tq = Ip[WS(rs, 4)];
Chris@82 59 Tr = Im[WS(rs, 3)];
Chris@82 60 Ts = Tq - Tr;
Chris@82 61 Tt = FNMS(KP500000000, Ts, Tp);
Chris@82 62 TT = Tr + Tq;
Chris@82 63 }
Chris@82 64 {
Chris@82 65 E T7, T8, Tx, Ty;
Chris@82 66 T6 = Rm[WS(rs, 5)];
Chris@82 67 T7 = Rm[WS(rs, 1)];
Chris@82 68 T8 = Rp[WS(rs, 2)];
Chris@82 69 T9 = T7 + T8;
Chris@82 70 TB = T7 - T8;
Chris@82 71 TV = FNMS(KP500000000, T9, T6);
Chris@82 72 Tw = Im[WS(rs, 5)];
Chris@82 73 Tx = Im[WS(rs, 1)];
Chris@82 74 Ty = Ip[WS(rs, 2)];
Chris@82 75 Tz = Tx - Ty;
Chris@82 76 TA = FNMS(KP500000000, Tz, Tw);
Chris@82 77 TW = Tx + Ty;
Chris@82 78 }
Chris@82 79 {
Chris@82 80 E T5, Ta, T1C, T1D;
Chris@82 81 Tv = FMA(KP866025403, Tu, Tt);
Chris@82 82 TC = FNMS(KP866025403, TB, TA);
Chris@82 83 TD = Tv + TC;
Chris@82 84 T1L = FNMS(KP866025403, Tu, Tt);
Chris@82 85 T1M = FMA(KP866025403, TB, TA);
Chris@82 86 T2y = T1L + T1M;
Chris@82 87 T5 = T1 + T4;
Chris@82 88 Ta = T6 + T9;
Chris@82 89 Tb = T5 + Ta;
Chris@82 90 T1Z = T5 - Ta;
Chris@82 91 T1C = FMA(KP866025403, TT, TS);
Chris@82 92 T1D = FNMS(KP866025403, TW, TV);
Chris@82 93 T1E = T1C + T1D;
Chris@82 94 T2D = T1C - T1D;
Chris@82 95 {
Chris@82 96 E T1c, T1d, TU, TX;
Chris@82 97 T1c = Tp + Ts;
Chris@82 98 T1d = Tw + Tz;
Chris@82 99 T1e = T1c - T1d;
Chris@82 100 T1U = T1c + T1d;
Chris@82 101 TU = FNMS(KP866025403, TT, TS);
Chris@82 102 TX = FMA(KP866025403, TW, TV);
Chris@82 103 TY = TU - TX;
Chris@82 104 T2o = TU + TX;
Chris@82 105 }
Chris@82 106 }
Chris@82 107 }
Chris@82 108 {
Chris@82 109 E Tc, Tf, TE, T12, TZ, T10, TH, T11, Th, Tk, TJ, T17, T14, T15, TM;
Chris@82 110 E T16;
Chris@82 111 {
Chris@82 112 E Td, Te, TF, TG;
Chris@82 113 Tc = Rp[WS(rs, 3)];
Chris@82 114 Td = Rm[WS(rs, 4)];
Chris@82 115 Te = Rm[0];
Chris@82 116 Tf = Td + Te;
Chris@82 117 TE = FNMS(KP500000000, Tf, Tc);
Chris@82 118 T12 = Td - Te;
Chris@82 119 TZ = Ip[WS(rs, 3)];
Chris@82 120 TF = Im[WS(rs, 4)];
Chris@82 121 TG = Im[0];
Chris@82 122 T10 = TF + TG;
Chris@82 123 TH = TF - TG;
Chris@82 124 T11 = FMA(KP500000000, T10, TZ);
Chris@82 125 }
Chris@82 126 {
Chris@82 127 E Ti, Tj, TK, TL;
Chris@82 128 Th = Rm[WS(rs, 2)];
Chris@82 129 Ti = Rp[WS(rs, 1)];
Chris@82 130 Tj = Rp[WS(rs, 5)];
Chris@82 131 Tk = Ti + Tj;
Chris@82 132 TJ = FNMS(KP500000000, Tk, Th);
Chris@82 133 T17 = Ti - Tj;
Chris@82 134 T14 = Im[WS(rs, 2)];
Chris@82 135 TK = Ip[WS(rs, 5)];
Chris@82 136 TL = Ip[WS(rs, 1)];
Chris@82 137 T15 = TK + TL;
Chris@82 138 TM = TK - TL;
Chris@82 139 T16 = FMA(KP500000000, T15, T14);
Chris@82 140 }
Chris@82 141 {
Chris@82 142 E Tg, Tl, T1F, T1G;
Chris@82 143 T13 = FMA(KP866025403, T12, T11);
Chris@82 144 T18 = FNMS(KP866025403, T17, T16);
Chris@82 145 T19 = T13 + T18;
Chris@82 146 T1O = FNMS(KP866025403, T12, T11);
Chris@82 147 T1P = FMA(KP866025403, T17, T16);
Chris@82 148 T2E = T1O + T1P;
Chris@82 149 Tg = Tc + Tf;
Chris@82 150 Tl = Th + Tk;
Chris@82 151 Tm = Tg + Tl;
Chris@82 152 T1V = Tg - Tl;
Chris@82 153 T1F = FNMS(KP866025403, TH, TE);
Chris@82 154 T1G = FNMS(KP866025403, TM, TJ);
Chris@82 155 T1H = T1F + T1G;
Chris@82 156 T2z = T1F - T1G;
Chris@82 157 {
Chris@82 158 E T1f, T1g, TI, TN;
Chris@82 159 T1f = TZ - T10;
Chris@82 160 T1g = T15 - T14;
Chris@82 161 T1h = T1f + T1g;
Chris@82 162 T20 = T1f - T1g;
Chris@82 163 TI = FMA(KP866025403, TH, TE);
Chris@82 164 TN = FMA(KP866025403, TM, TJ);
Chris@82 165 TO = TI - TN;
Chris@82 166 T2p = TI + TN;
Chris@82 167 }
Chris@82 168 }
Chris@82 169 }
Chris@82 170 {
Chris@82 171 E Tn, T1i, TP, T1a, TQ, T1j, To, T1b, T1k, TR;
Chris@82 172 Tn = Tb + Tm;
Chris@82 173 T1i = T1e + T1h;
Chris@82 174 TP = TD + TO;
Chris@82 175 T1a = TY - T19;
Chris@82 176 To = W[0];
Chris@82 177 TQ = To * TP;
Chris@82 178 T1j = To * T1a;
Chris@82 179 TR = W[1];
Chris@82 180 T1b = FMA(TR, T1a, TQ);
Chris@82 181 T1k = FNMS(TR, TP, T1j);
Chris@82 182 Rp[0] = Tn - T1b;
Chris@82 183 Ip[0] = T1i + T1k;
Chris@82 184 Rm[0] = Tn + T1b;
Chris@82 185 Im[0] = T1k - T1i;
Chris@82 186 }
Chris@82 187 {
Chris@82 188 E T1p, T1l, T1n, T1o, T1x, T1s, T1v, T1t, T1z, T1m, T1r;
Chris@82 189 T1p = T1e - T1h;
Chris@82 190 T1m = Tb - Tm;
Chris@82 191 T1l = W[10];
Chris@82 192 T1n = T1l * T1m;
Chris@82 193 T1o = W[11];
Chris@82 194 T1x = T1o * T1m;
Chris@82 195 T1s = TD - TO;
Chris@82 196 T1v = TY + T19;
Chris@82 197 T1r = W[12];
Chris@82 198 T1t = T1r * T1s;
Chris@82 199 T1z = T1r * T1v;
Chris@82 200 {
Chris@82 201 E T1q, T1y, T1w, T1A, T1u;
Chris@82 202 T1q = FNMS(T1o, T1p, T1n);
Chris@82 203 T1y = FMA(T1l, T1p, T1x);
Chris@82 204 T1u = W[13];
Chris@82 205 T1w = FMA(T1u, T1v, T1t);
Chris@82 206 T1A = FNMS(T1u, T1s, T1z);
Chris@82 207 Rp[WS(rs, 3)] = T1q - T1w;
Chris@82 208 Ip[WS(rs, 3)] = T1y + T1A;
Chris@82 209 Rm[WS(rs, 3)] = T1q + T1w;
Chris@82 210 Im[WS(rs, 3)] = T1A - T1y;
Chris@82 211 }
Chris@82 212 }
Chris@82 213 {
Chris@82 214 E T1R, T2b, T27, T29, T2a, T2l, T1B, T1J, T1K, T25, T1W, T21, T1X, T23, T2e;
Chris@82 215 E T2h, T2f, T2j;
Chris@82 216 {
Chris@82 217 E T1N, T1Q, T28, T1I, T1T, T2d;
Chris@82 218 T1N = T1L - T1M;
Chris@82 219 T1Q = T1O - T1P;
Chris@82 220 T1R = T1N - T1Q;
Chris@82 221 T2b = T1N + T1Q;
Chris@82 222 T28 = T1E + T1H;
Chris@82 223 T27 = W[14];
Chris@82 224 T29 = T27 * T28;
Chris@82 225 T2a = W[15];
Chris@82 226 T2l = T2a * T28;
Chris@82 227 T1I = T1E - T1H;
Chris@82 228 T1B = W[2];
Chris@82 229 T1J = T1B * T1I;
Chris@82 230 T1K = W[3];
Chris@82 231 T25 = T1K * T1I;
Chris@82 232 T1W = T1U - T1V;
Chris@82 233 T21 = T1Z + T20;
Chris@82 234 T1T = W[4];
Chris@82 235 T1X = T1T * T1W;
Chris@82 236 T23 = T1T * T21;
Chris@82 237 T2e = T1V + T1U;
Chris@82 238 T2h = T1Z - T20;
Chris@82 239 T2d = W[16];
Chris@82 240 T2f = T2d * T2e;
Chris@82 241 T2j = T2d * T2h;
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E T1S, T26, T22, T24, T1Y;
Chris@82 245 T1S = FNMS(T1K, T1R, T1J);
Chris@82 246 T26 = FMA(T1B, T1R, T25);
Chris@82 247 T1Y = W[5];
Chris@82 248 T22 = FMA(T1Y, T21, T1X);
Chris@82 249 T24 = FNMS(T1Y, T1W, T23);
Chris@82 250 Rp[WS(rs, 1)] = T1S - T22;
Chris@82 251 Ip[WS(rs, 1)] = T24 + T26;
Chris@82 252 Rm[WS(rs, 1)] = T22 + T1S;
Chris@82 253 Im[WS(rs, 1)] = T24 - T26;
Chris@82 254 }
Chris@82 255 {
Chris@82 256 E T2c, T2m, T2i, T2k, T2g;
Chris@82 257 T2c = FNMS(T2a, T2b, T29);
Chris@82 258 T2m = FMA(T27, T2b, T2l);
Chris@82 259 T2g = W[17];
Chris@82 260 T2i = FMA(T2g, T2h, T2f);
Chris@82 261 T2k = FNMS(T2g, T2e, T2j);
Chris@82 262 Rp[WS(rs, 4)] = T2c - T2i;
Chris@82 263 Ip[WS(rs, 4)] = T2k + T2m;
Chris@82 264 Rm[WS(rs, 4)] = T2i + T2c;
Chris@82 265 Im[WS(rs, 4)] = T2k - T2m;
Chris@82 266 }
Chris@82 267 }
Chris@82 268 {
Chris@82 269 E T2v, T2P, T2L, T2N, T2O, T2X, T2n, T2r, T2s, T2H, T2A, T2F, T2B, T2J, T2S;
Chris@82 270 E T2V, T2T, T2Z;
Chris@82 271 {
Chris@82 272 E T2t, T2u, T2M, T2q, T2x, T2R;
Chris@82 273 T2t = Tv - TC;
Chris@82 274 T2u = T13 - T18;
Chris@82 275 T2v = T2t + T2u;
Chris@82 276 T2P = T2t - T2u;
Chris@82 277 T2M = T2o - T2p;
Chris@82 278 T2L = W[18];
Chris@82 279 T2N = T2L * T2M;
Chris@82 280 T2O = W[19];
Chris@82 281 T2X = T2O * T2M;
Chris@82 282 T2q = T2o + T2p;
Chris@82 283 T2n = W[6];
Chris@82 284 T2r = T2n * T2q;
Chris@82 285 T2s = W[7];
Chris@82 286 T2H = T2s * T2q;
Chris@82 287 T2A = T2y + T2z;
Chris@82 288 T2F = T2D - T2E;
Chris@82 289 T2x = W[8];
Chris@82 290 T2B = T2x * T2A;
Chris@82 291 T2J = T2x * T2F;
Chris@82 292 T2S = T2y - T2z;
Chris@82 293 T2V = T2D + T2E;
Chris@82 294 T2R = W[20];
Chris@82 295 T2T = T2R * T2S;
Chris@82 296 T2Z = T2R * T2V;
Chris@82 297 }
Chris@82 298 {
Chris@82 299 E T2w, T2I, T2G, T2K, T2C;
Chris@82 300 T2w = FNMS(T2s, T2v, T2r);
Chris@82 301 T2I = FMA(T2n, T2v, T2H);
Chris@82 302 T2C = W[9];
Chris@82 303 T2G = FMA(T2C, T2F, T2B);
Chris@82 304 T2K = FNMS(T2C, T2A, T2J);
Chris@82 305 Rp[WS(rs, 2)] = T2w - T2G;
Chris@82 306 Ip[WS(rs, 2)] = T2I + T2K;
Chris@82 307 Rm[WS(rs, 2)] = T2w + T2G;
Chris@82 308 Im[WS(rs, 2)] = T2K - T2I;
Chris@82 309 }
Chris@82 310 {
Chris@82 311 E T2Q, T2Y, T2W, T30, T2U;
Chris@82 312 T2Q = FNMS(T2O, T2P, T2N);
Chris@82 313 T2Y = FMA(T2L, T2P, T2X);
Chris@82 314 T2U = W[21];
Chris@82 315 T2W = FMA(T2U, T2V, T2T);
Chris@82 316 T30 = FNMS(T2U, T2S, T2Z);
Chris@82 317 Rp[WS(rs, 5)] = T2Q - T2W;
Chris@82 318 Ip[WS(rs, 5)] = T2Y + T30;
Chris@82 319 Rm[WS(rs, 5)] = T2Q + T2W;
Chris@82 320 Im[WS(rs, 5)] = T30 - T2Y;
Chris@82 321 }
Chris@82 322 }
Chris@82 323 }
Chris@82 324 }
Chris@82 325 }
Chris@82 326
Chris@82 327 static const tw_instr twinstr[] = {
Chris@82 328 {TW_FULL, 1, 12},
Chris@82 329 {TW_NEXT, 1, 0}
Chris@82 330 };
Chris@82 331
Chris@82 332 static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {96, 22, 46, 0} };
Chris@82 333
Chris@82 334 void X(codelet_hc2cbdft_12) (planner *p) {
Chris@82 335 X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
Chris@82 336 }
Chris@82 337 #else
Chris@82 338
Chris@82 339 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hc2cbdft_12 -include rdft/scalar/hc2cb.h */
Chris@82 340
Chris@82 341 /*
Chris@82 342 * This function contains 142 FP additions, 60 FP multiplications,
Chris@82 343 * (or, 112 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 344 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 345 */
Chris@82 346 #include "rdft/scalar/hc2cb.h"
Chris@82 347
Chris@82 348 static void hc2cbdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 349 {
Chris@82 350 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 351 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 352 {
Chris@82 353 INT m;
Chris@82 354 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 355 E Tv, T1E, TC, T1F, TW, T1x, TT, T1w, T1d, T1N, Tb, T1R, TI, T1z, TN;
Chris@82 356 E T1A, T17, T1I, T12, T1H, T1g, T1S, Tm, T1O;
Chris@82 357 {
Chris@82 358 E T1, Tq, T6, TA, T4, Tp, Tt, TS, T9, Tw, Tz, TV;
Chris@82 359 T1 = Rp[0];
Chris@82 360 Tq = Ip[0];
Chris@82 361 T6 = Rm[WS(rs, 5)];
Chris@82 362 TA = Im[WS(rs, 5)];
Chris@82 363 {
Chris@82 364 E T2, T3, Tr, Ts;
Chris@82 365 T2 = Rp[WS(rs, 4)];
Chris@82 366 T3 = Rm[WS(rs, 3)];
Chris@82 367 T4 = T2 + T3;
Chris@82 368 Tp = KP866025403 * (T2 - T3);
Chris@82 369 Tr = Im[WS(rs, 3)];
Chris@82 370 Ts = Ip[WS(rs, 4)];
Chris@82 371 Tt = Tr - Ts;
Chris@82 372 TS = KP866025403 * (Tr + Ts);
Chris@82 373 }
Chris@82 374 {
Chris@82 375 E T7, T8, Tx, Ty;
Chris@82 376 T7 = Rm[WS(rs, 1)];
Chris@82 377 T8 = Rp[WS(rs, 2)];
Chris@82 378 T9 = T7 + T8;
Chris@82 379 Tw = KP866025403 * (T7 - T8);
Chris@82 380 Tx = Im[WS(rs, 1)];
Chris@82 381 Ty = Ip[WS(rs, 2)];
Chris@82 382 Tz = Tx - Ty;
Chris@82 383 TV = KP866025403 * (Tx + Ty);
Chris@82 384 }
Chris@82 385 {
Chris@82 386 E Tu, TB, TU, TR;
Chris@82 387 Tu = FMA(KP500000000, Tt, Tq);
Chris@82 388 Tv = Tp + Tu;
Chris@82 389 T1E = Tu - Tp;
Chris@82 390 TB = FMS(KP500000000, Tz, TA);
Chris@82 391 TC = Tw + TB;
Chris@82 392 T1F = TB - Tw;
Chris@82 393 TU = FNMS(KP500000000, T9, T6);
Chris@82 394 TW = TU + TV;
Chris@82 395 T1x = TU - TV;
Chris@82 396 TR = FNMS(KP500000000, T4, T1);
Chris@82 397 TT = TR - TS;
Chris@82 398 T1w = TR + TS;
Chris@82 399 {
Chris@82 400 E T1b, T1c, T5, Ta;
Chris@82 401 T1b = Tq - Tt;
Chris@82 402 T1c = Tz + TA;
Chris@82 403 T1d = T1b - T1c;
Chris@82 404 T1N = T1b + T1c;
Chris@82 405 T5 = T1 + T4;
Chris@82 406 Ta = T6 + T9;
Chris@82 407 Tb = T5 + Ta;
Chris@82 408 T1R = T5 - Ta;
Chris@82 409 }
Chris@82 410 }
Chris@82 411 }
Chris@82 412 {
Chris@82 413 E Tc, T10, Th, T15, Tf, TY, TH, TZ, Tk, T13, TM, T14;
Chris@82 414 Tc = Rp[WS(rs, 3)];
Chris@82 415 T10 = Ip[WS(rs, 3)];
Chris@82 416 Th = Rm[WS(rs, 2)];
Chris@82 417 T15 = Im[WS(rs, 2)];
Chris@82 418 {
Chris@82 419 E Td, Te, TF, TG;
Chris@82 420 Td = Rm[WS(rs, 4)];
Chris@82 421 Te = Rm[0];
Chris@82 422 Tf = Td + Te;
Chris@82 423 TY = KP866025403 * (Td - Te);
Chris@82 424 TF = Im[WS(rs, 4)];
Chris@82 425 TG = Im[0];
Chris@82 426 TH = KP866025403 * (TF - TG);
Chris@82 427 TZ = TF + TG;
Chris@82 428 }
Chris@82 429 {
Chris@82 430 E Ti, Tj, TK, TL;
Chris@82 431 Ti = Rp[WS(rs, 1)];
Chris@82 432 Tj = Rp[WS(rs, 5)];
Chris@82 433 Tk = Ti + Tj;
Chris@82 434 T13 = KP866025403 * (Ti - Tj);
Chris@82 435 TK = Ip[WS(rs, 5)];
Chris@82 436 TL = Ip[WS(rs, 1)];
Chris@82 437 TM = KP866025403 * (TK - TL);
Chris@82 438 T14 = TK + TL;
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E TE, TJ, T16, T11;
Chris@82 442 TE = FNMS(KP500000000, Tf, Tc);
Chris@82 443 TI = TE + TH;
Chris@82 444 T1z = TE - TH;
Chris@82 445 TJ = FNMS(KP500000000, Tk, Th);
Chris@82 446 TN = TJ + TM;
Chris@82 447 T1A = TJ - TM;
Chris@82 448 T16 = FMA(KP500000000, T14, T15);
Chris@82 449 T17 = T13 - T16;
Chris@82 450 T1I = T13 + T16;
Chris@82 451 T11 = FMA(KP500000000, TZ, T10);
Chris@82 452 T12 = TY + T11;
Chris@82 453 T1H = T11 - TY;
Chris@82 454 {
Chris@82 455 E T1e, T1f, Tg, Tl;
Chris@82 456 T1e = T10 - TZ;
Chris@82 457 T1f = T14 - T15;
Chris@82 458 T1g = T1e + T1f;
Chris@82 459 T1S = T1e - T1f;
Chris@82 460 Tg = Tc + Tf;
Chris@82 461 Tl = Th + Tk;
Chris@82 462 Tm = Tg + Tl;
Chris@82 463 T1O = Tg - Tl;
Chris@82 464 }
Chris@82 465 }
Chris@82 466 }
Chris@82 467 {
Chris@82 468 E Tn, T1h, TP, T1p, T19, T1r, T1n, T1t;
Chris@82 469 Tn = Tb + Tm;
Chris@82 470 T1h = T1d + T1g;
Chris@82 471 {
Chris@82 472 E TD, TO, TX, T18;
Chris@82 473 TD = Tv - TC;
Chris@82 474 TO = TI - TN;
Chris@82 475 TP = TD + TO;
Chris@82 476 T1p = TD - TO;
Chris@82 477 TX = TT - TW;
Chris@82 478 T18 = T12 - T17;
Chris@82 479 T19 = TX - T18;
Chris@82 480 T1r = TX + T18;
Chris@82 481 {
Chris@82 482 E T1k, T1m, T1j, T1l;
Chris@82 483 T1k = Tb - Tm;
Chris@82 484 T1m = T1d - T1g;
Chris@82 485 T1j = W[10];
Chris@82 486 T1l = W[11];
Chris@82 487 T1n = FNMS(T1l, T1m, T1j * T1k);
Chris@82 488 T1t = FMA(T1l, T1k, T1j * T1m);
Chris@82 489 }
Chris@82 490 }
Chris@82 491 {
Chris@82 492 E T1a, T1i, To, TQ;
Chris@82 493 To = W[0];
Chris@82 494 TQ = W[1];
Chris@82 495 T1a = FMA(To, TP, TQ * T19);
Chris@82 496 T1i = FNMS(TQ, TP, To * T19);
Chris@82 497 Rp[0] = Tn - T1a;
Chris@82 498 Ip[0] = T1h + T1i;
Chris@82 499 Rm[0] = Tn + T1a;
Chris@82 500 Im[0] = T1i - T1h;
Chris@82 501 }
Chris@82 502 {
Chris@82 503 E T1s, T1u, T1o, T1q;
Chris@82 504 T1o = W[12];
Chris@82 505 T1q = W[13];
Chris@82 506 T1s = FMA(T1o, T1p, T1q * T1r);
Chris@82 507 T1u = FNMS(T1q, T1p, T1o * T1r);
Chris@82 508 Rp[WS(rs, 3)] = T1n - T1s;
Chris@82 509 Ip[WS(rs, 3)] = T1t + T1u;
Chris@82 510 Rm[WS(rs, 3)] = T1n + T1s;
Chris@82 511 Im[WS(rs, 3)] = T1u - T1t;
Chris@82 512 }
Chris@82 513 }
Chris@82 514 {
Chris@82 515 E T1C, T1Y, T1K, T20, T1U, T1V, T26, T27;
Chris@82 516 {
Chris@82 517 E T1y, T1B, T1G, T1J;
Chris@82 518 T1y = T1w + T1x;
Chris@82 519 T1B = T1z + T1A;
Chris@82 520 T1C = T1y - T1B;
Chris@82 521 T1Y = T1y + T1B;
Chris@82 522 T1G = T1E + T1F;
Chris@82 523 T1J = T1H - T1I;
Chris@82 524 T1K = T1G - T1J;
Chris@82 525 T20 = T1G + T1J;
Chris@82 526 }
Chris@82 527 {
Chris@82 528 E T1P, T1T, T1M, T1Q;
Chris@82 529 T1P = T1N - T1O;
Chris@82 530 T1T = T1R + T1S;
Chris@82 531 T1M = W[4];
Chris@82 532 T1Q = W[5];
Chris@82 533 T1U = FMA(T1M, T1P, T1Q * T1T);
Chris@82 534 T1V = FNMS(T1Q, T1P, T1M * T1T);
Chris@82 535 }
Chris@82 536 {
Chris@82 537 E T23, T25, T22, T24;
Chris@82 538 T23 = T1O + T1N;
Chris@82 539 T25 = T1R - T1S;
Chris@82 540 T22 = W[16];
Chris@82 541 T24 = W[17];
Chris@82 542 T26 = FMA(T22, T23, T24 * T25);
Chris@82 543 T27 = FNMS(T24, T23, T22 * T25);
Chris@82 544 }
Chris@82 545 {
Chris@82 546 E T1L, T1W, T1v, T1D;
Chris@82 547 T1v = W[2];
Chris@82 548 T1D = W[3];
Chris@82 549 T1L = FNMS(T1D, T1K, T1v * T1C);
Chris@82 550 T1W = FMA(T1D, T1C, T1v * T1K);
Chris@82 551 Rp[WS(rs, 1)] = T1L - T1U;
Chris@82 552 Ip[WS(rs, 1)] = T1V + T1W;
Chris@82 553 Rm[WS(rs, 1)] = T1U + T1L;
Chris@82 554 Im[WS(rs, 1)] = T1V - T1W;
Chris@82 555 }
Chris@82 556 {
Chris@82 557 E T21, T28, T1X, T1Z;
Chris@82 558 T1X = W[14];
Chris@82 559 T1Z = W[15];
Chris@82 560 T21 = FNMS(T1Z, T20, T1X * T1Y);
Chris@82 561 T28 = FMA(T1Z, T1Y, T1X * T20);
Chris@82 562 Rp[WS(rs, 4)] = T21 - T26;
Chris@82 563 Ip[WS(rs, 4)] = T27 + T28;
Chris@82 564 Rm[WS(rs, 4)] = T26 + T21;
Chris@82 565 Im[WS(rs, 4)] = T27 - T28;
Chris@82 566 }
Chris@82 567 }
Chris@82 568 {
Chris@82 569 E T2c, T2u, T2p, T2B, T2g, T2w, T2l, T2z;
Chris@82 570 {
Chris@82 571 E T2a, T2b, T2n, T2o;
Chris@82 572 T2a = TT + TW;
Chris@82 573 T2b = TI + TN;
Chris@82 574 T2c = T2a + T2b;
Chris@82 575 T2u = T2a - T2b;
Chris@82 576 T2n = T1w - T1x;
Chris@82 577 T2o = T1H + T1I;
Chris@82 578 T2p = T2n - T2o;
Chris@82 579 T2B = T2n + T2o;
Chris@82 580 }
Chris@82 581 {
Chris@82 582 E T2e, T2f, T2j, T2k;
Chris@82 583 T2e = Tv + TC;
Chris@82 584 T2f = T12 + T17;
Chris@82 585 T2g = T2e + T2f;
Chris@82 586 T2w = T2e - T2f;
Chris@82 587 T2j = T1E - T1F;
Chris@82 588 T2k = T1z - T1A;
Chris@82 589 T2l = T2j + T2k;
Chris@82 590 T2z = T2j - T2k;
Chris@82 591 }
Chris@82 592 {
Chris@82 593 E T2h, T2r, T2q, T2s;
Chris@82 594 {
Chris@82 595 E T29, T2d, T2i, T2m;
Chris@82 596 T29 = W[6];
Chris@82 597 T2d = W[7];
Chris@82 598 T2h = FNMS(T2d, T2g, T29 * T2c);
Chris@82 599 T2r = FMA(T2d, T2c, T29 * T2g);
Chris@82 600 T2i = W[8];
Chris@82 601 T2m = W[9];
Chris@82 602 T2q = FMA(T2i, T2l, T2m * T2p);
Chris@82 603 T2s = FNMS(T2m, T2l, T2i * T2p);
Chris@82 604 }
Chris@82 605 Rp[WS(rs, 2)] = T2h - T2q;
Chris@82 606 Ip[WS(rs, 2)] = T2r + T2s;
Chris@82 607 Rm[WS(rs, 2)] = T2h + T2q;
Chris@82 608 Im[WS(rs, 2)] = T2s - T2r;
Chris@82 609 }
Chris@82 610 {
Chris@82 611 E T2x, T2D, T2C, T2E;
Chris@82 612 {
Chris@82 613 E T2t, T2v, T2y, T2A;
Chris@82 614 T2t = W[18];
Chris@82 615 T2v = W[19];
Chris@82 616 T2x = FNMS(T2v, T2w, T2t * T2u);
Chris@82 617 T2D = FMA(T2v, T2u, T2t * T2w);
Chris@82 618 T2y = W[20];
Chris@82 619 T2A = W[21];
Chris@82 620 T2C = FMA(T2y, T2z, T2A * T2B);
Chris@82 621 T2E = FNMS(T2A, T2z, T2y * T2B);
Chris@82 622 }
Chris@82 623 Rp[WS(rs, 5)] = T2x - T2C;
Chris@82 624 Ip[WS(rs, 5)] = T2D + T2E;
Chris@82 625 Rm[WS(rs, 5)] = T2x + T2C;
Chris@82 626 Im[WS(rs, 5)] = T2E - T2D;
Chris@82 627 }
Chris@82 628 }
Chris@82 629 }
Chris@82 630 }
Chris@82 631 }
Chris@82 632
Chris@82 633 static const tw_instr twinstr[] = {
Chris@82 634 {TW_FULL, 1, 12},
Chris@82 635 {TW_NEXT, 1, 0}
Chris@82 636 };
Chris@82 637
Chris@82 638 static const hc2c_desc desc = { 12, "hc2cbdft_12", twinstr, &GENUS, {112, 30, 30, 0} };
Chris@82 639
Chris@82 640 void X(codelet_hc2cbdft_12) (planner *p) {
Chris@82 641 X(khc2c_register) (p, hc2cbdft_12, &desc, HC2C_VIA_DFT);
Chris@82 642 }
Chris@82 643 #endif