annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:11 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cfdft_12 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 142 FP additions, 92 FP multiplications,
Chris@82 32 * (or, 96 additions, 46 multiplications, 46 fused multiply/add),
Chris@82 33 * 65 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 44 E To, T1E, T1m, T2H, Ta, T1G, Tk, T1I, Tl, T1J, T1s, T2b, T1A, T2d, T1B;
Chris@82 45 E T2I, T12, T18, T19, T24, T26, T2C, Tz, T1M, T1f, T2B, TJ, T1O, TT, T1Q;
Chris@82 46 E TU, T1R;
Chris@82 47 {
Chris@82 48 E Tm, Tn, T1u, T1x, T1y, T1z, T1v, T2c, Te, Tj, T1i, T1l, Tf, T1H, T4;
Chris@82 49 E T1o, T9, T1r, T5, T1F, T1p, T2a, T1t, T1, T1n;
Chris@82 50 Tm = Ip[0];
Chris@82 51 Tn = Im[0];
Chris@82 52 T1u = Tm + Tn;
Chris@82 53 T1x = Rp[0];
Chris@82 54 T1y = Rm[0];
Chris@82 55 T1z = T1x - T1y;
Chris@82 56 T1t = W[0];
Chris@82 57 T1v = T1t * T1u;
Chris@82 58 T2c = T1t * T1z;
Chris@82 59 {
Chris@82 60 E Tc, Td, Th, Ti, Tb;
Chris@82 61 Tc = Ip[WS(rs, 4)];
Chris@82 62 Td = Im[WS(rs, 4)];
Chris@82 63 Te = Tc - Td;
Chris@82 64 Th = Rp[WS(rs, 4)];
Chris@82 65 Ti = Rm[WS(rs, 4)];
Chris@82 66 Tj = Th + Ti;
Chris@82 67 T1i = Tc + Td;
Chris@82 68 T1l = Th - Ti;
Chris@82 69 Tb = W[14];
Chris@82 70 Tf = Tb * Te;
Chris@82 71 T1H = Tb * Tj;
Chris@82 72 }
Chris@82 73 {
Chris@82 74 E T2, T3, T7, T8;
Chris@82 75 T2 = Ip[WS(rs, 2)];
Chris@82 76 T3 = Im[WS(rs, 2)];
Chris@82 77 T4 = T2 - T3;
Chris@82 78 T1o = T2 + T3;
Chris@82 79 T7 = Rp[WS(rs, 2)];
Chris@82 80 T8 = Rm[WS(rs, 2)];
Chris@82 81 T9 = T7 + T8;
Chris@82 82 T1r = T7 - T8;
Chris@82 83 }
Chris@82 84 T1 = W[6];
Chris@82 85 T5 = T1 * T4;
Chris@82 86 T1F = T1 * T9;
Chris@82 87 T1n = W[8];
Chris@82 88 T1p = T1n * T1o;
Chris@82 89 T2a = T1n * T1r;
Chris@82 90 To = Tm - Tn;
Chris@82 91 T1E = T1x + T1y;
Chris@82 92 {
Chris@82 93 E T1j, T2G, T1h, T1k;
Chris@82 94 T1h = W[16];
Chris@82 95 T1j = T1h * T1i;
Chris@82 96 T2G = T1h * T1l;
Chris@82 97 T1k = W[17];
Chris@82 98 T1m = FNMS(T1k, T1l, T1j);
Chris@82 99 T2H = FMA(T1k, T1i, T2G);
Chris@82 100 }
Chris@82 101 {
Chris@82 102 E T6, Tg, T1q, T1w;
Chris@82 103 T6 = W[7];
Chris@82 104 Ta = FNMS(T6, T9, T5);
Chris@82 105 T1G = FMA(T6, T4, T1F);
Chris@82 106 Tg = W[15];
Chris@82 107 Tk = FNMS(Tg, Tj, Tf);
Chris@82 108 T1I = FMA(Tg, Te, T1H);
Chris@82 109 Tl = Ta + Tk;
Chris@82 110 T1J = T1G + T1I;
Chris@82 111 T1q = W[9];
Chris@82 112 T1s = FNMS(T1q, T1r, T1p);
Chris@82 113 T2b = FMA(T1q, T1o, T2a);
Chris@82 114 T1w = W[1];
Chris@82 115 T1A = FNMS(T1w, T1z, T1v);
Chris@82 116 T2d = FMA(T1w, T1u, T2c);
Chris@82 117 T1B = T1s + T1A;
Chris@82 118 T2I = T2b + T2d;
Chris@82 119 }
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E Tt, T11, Ty, T10, T23, TX, TZ, TN, TS, T1b, T1e, TO, T1P, TD, TI;
Chris@82 123 E T17, T16, T25, T13, T15, TE, T1N, TF, TP;
Chris@82 124 {
Chris@82 125 E Tr, Ts, Tw, Tx, TY;
Chris@82 126 Tr = Ip[WS(rs, 3)];
Chris@82 127 Ts = Im[WS(rs, 3)];
Chris@82 128 Tt = Tr - Ts;
Chris@82 129 T11 = Tr + Ts;
Chris@82 130 Tw = Rp[WS(rs, 3)];
Chris@82 131 Tx = Rm[WS(rs, 3)];
Chris@82 132 TY = Tx - Tw;
Chris@82 133 Ty = Tw + Tx;
Chris@82 134 T10 = W[12];
Chris@82 135 T23 = T10 * TY;
Chris@82 136 TX = W[13];
Chris@82 137 TZ = TX * TY;
Chris@82 138 }
Chris@82 139 {
Chris@82 140 E TL, TM, TQ, TR, TK;
Chris@82 141 TL = Ip[WS(rs, 1)];
Chris@82 142 TM = Im[WS(rs, 1)];
Chris@82 143 TN = TL - TM;
Chris@82 144 TQ = Rp[WS(rs, 1)];
Chris@82 145 TR = Rm[WS(rs, 1)];
Chris@82 146 TS = TQ + TR;
Chris@82 147 T1b = TL + TM;
Chris@82 148 T1e = TQ - TR;
Chris@82 149 TK = W[2];
Chris@82 150 TO = TK * TN;
Chris@82 151 T1P = TK * TS;
Chris@82 152 }
Chris@82 153 {
Chris@82 154 E TB, TC, T14, TG, TH, TA;
Chris@82 155 TB = Ip[WS(rs, 5)];
Chris@82 156 TC = Im[WS(rs, 5)];
Chris@82 157 TD = TB - TC;
Chris@82 158 TG = Rp[WS(rs, 5)];
Chris@82 159 TH = Rm[WS(rs, 5)];
Chris@82 160 TI = TG + TH;
Chris@82 161 T14 = TH - TG;
Chris@82 162 T17 = TB + TC;
Chris@82 163 T16 = W[20];
Chris@82 164 T25 = T16 * T14;
Chris@82 165 T13 = W[21];
Chris@82 166 T15 = T13 * T14;
Chris@82 167 TA = W[18];
Chris@82 168 TE = TA * TD;
Chris@82 169 T1N = TA * TI;
Chris@82 170 }
Chris@82 171 T12 = FMA(T10, T11, TZ);
Chris@82 172 T18 = FMA(T16, T17, T15);
Chris@82 173 T19 = T12 + T18;
Chris@82 174 T24 = FNMS(TX, T11, T23);
Chris@82 175 T26 = FNMS(T13, T17, T25);
Chris@82 176 T2C = T24 + T26;
Chris@82 177 {
Chris@82 178 E Tu, T1L, Tq, Tv;
Chris@82 179 Tq = W[10];
Chris@82 180 Tu = Tq * Tt;
Chris@82 181 T1L = Tq * Ty;
Chris@82 182 Tv = W[11];
Chris@82 183 Tz = FNMS(Tv, Ty, Tu);
Chris@82 184 T1M = FMA(Tv, Tt, T1L);
Chris@82 185 }
Chris@82 186 {
Chris@82 187 E T1c, T2A, T1a, T1d;
Chris@82 188 T1a = W[4];
Chris@82 189 T1c = T1a * T1b;
Chris@82 190 T2A = T1a * T1e;
Chris@82 191 T1d = W[5];
Chris@82 192 T1f = FNMS(T1d, T1e, T1c);
Chris@82 193 T2B = FMA(T1d, T1b, T2A);
Chris@82 194 }
Chris@82 195 TF = W[19];
Chris@82 196 TJ = FNMS(TF, TI, TE);
Chris@82 197 T1O = FMA(TF, TD, T1N);
Chris@82 198 TP = W[3];
Chris@82 199 TT = FNMS(TP, TS, TO);
Chris@82 200 T1Q = FMA(TP, TN, T1P);
Chris@82 201 TU = TJ + TT;
Chris@82 202 T1R = T1O + T1Q;
Chris@82 203 }
Chris@82 204 {
Chris@82 205 E TW, T2V, T2Y, T30, T1D, T1U, T1T, T2Z;
Chris@82 206 {
Chris@82 207 E Tp, TV, T2W, T2X;
Chris@82 208 Tp = Tl + To;
Chris@82 209 TV = Tz + TU;
Chris@82 210 TW = Tp - TV;
Chris@82 211 T2V = TV + Tp;
Chris@82 212 T2W = T2C - T2B;
Chris@82 213 T2X = T2H + T2I;
Chris@82 214 T2Y = T2W - T2X;
Chris@82 215 T30 = T2W + T2X;
Chris@82 216 }
Chris@82 217 {
Chris@82 218 E T1g, T1C, T1K, T1S;
Chris@82 219 T1g = T19 + T1f;
Chris@82 220 T1C = T1m + T1B;
Chris@82 221 T1D = T1g - T1C;
Chris@82 222 T1U = T1g + T1C;
Chris@82 223 T1K = T1E + T1J;
Chris@82 224 T1S = T1M + T1R;
Chris@82 225 T1T = T1K + T1S;
Chris@82 226 T2Z = T1K - T1S;
Chris@82 227 }
Chris@82 228 Ip[WS(rs, 3)] = KP500000000 * (TW + T1D);
Chris@82 229 Rp[WS(rs, 3)] = KP500000000 * (T2Z - T30);
Chris@82 230 Im[WS(rs, 2)] = KP500000000 * (T1D - TW);
Chris@82 231 Rm[WS(rs, 2)] = KP500000000 * (T2Z + T30);
Chris@82 232 Rm[WS(rs, 5)] = KP500000000 * (T1T - T1U);
Chris@82 233 Im[WS(rs, 5)] = KP500000000 * (T2Y - T2V);
Chris@82 234 Rp[0] = KP500000000 * (T1T + T1U);
Chris@82 235 Ip[0] = KP500000000 * (T2V + T2Y);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E T1X, T2v, T2F, T2Q, T2L, T2R, T20, T2w, T28, T2t, T2j, T2p, T2m, T2q, T2f;
Chris@82 239 E T2s;
Chris@82 240 {
Chris@82 241 E T1V, T1W, T2D, T2E;
Chris@82 242 T1V = FNMS(KP500000000, T1J, T1E);
Chris@82 243 T1W = Ta - Tk;
Chris@82 244 T1X = FNMS(KP866025403, T1W, T1V);
Chris@82 245 T2v = FMA(KP866025403, T1W, T1V);
Chris@82 246 T2D = FMA(KP500000000, T2C, T2B);
Chris@82 247 T2E = T18 - T12;
Chris@82 248 T2F = FNMS(KP866025403, T2E, T2D);
Chris@82 249 T2Q = FMA(KP866025403, T2E, T2D);
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T2J, T2K, T1Y, T1Z;
Chris@82 253 T2J = FNMS(KP500000000, T2I, T2H);
Chris@82 254 T2K = T1s - T1A;
Chris@82 255 T2L = FNMS(KP866025403, T2K, T2J);
Chris@82 256 T2R = FMA(KP866025403, T2K, T2J);
Chris@82 257 T1Y = FNMS(KP500000000, T1R, T1M);
Chris@82 258 T1Z = TJ - TT;
Chris@82 259 T20 = FNMS(KP866025403, T1Z, T1Y);
Chris@82 260 T2w = FMA(KP866025403, T1Z, T1Y);
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E T22, T27, T2h, T2i;
Chris@82 264 T22 = FNMS(KP500000000, T19, T1f);
Chris@82 265 T27 = T24 - T26;
Chris@82 266 T28 = FNMS(KP866025403, T27, T22);
Chris@82 267 T2t = FMA(KP866025403, T27, T22);
Chris@82 268 T2h = FNMS(KP500000000, Tl, To);
Chris@82 269 T2i = T1I - T1G;
Chris@82 270 T2j = FNMS(KP866025403, T2i, T2h);
Chris@82 271 T2p = FMA(KP866025403, T2i, T2h);
Chris@82 272 }
Chris@82 273 {
Chris@82 274 E T2k, T2l, T29, T2e;
Chris@82 275 T2k = FNMS(KP500000000, TU, Tz);
Chris@82 276 T2l = T1Q - T1O;
Chris@82 277 T2m = FNMS(KP866025403, T2l, T2k);
Chris@82 278 T2q = FMA(KP866025403, T2l, T2k);
Chris@82 279 T29 = FNMS(KP500000000, T1B, T1m);
Chris@82 280 T2e = T2b - T2d;
Chris@82 281 T2f = FNMS(KP866025403, T2e, T29);
Chris@82 282 T2s = FMA(KP866025403, T2e, T29);
Chris@82 283 }
Chris@82 284 {
Chris@82 285 E T21, T2g, T2P, T2S;
Chris@82 286 T21 = T1X + T20;
Chris@82 287 T2g = T28 + T2f;
Chris@82 288 Rp[WS(rs, 2)] = KP500000000 * (T21 - T2g);
Chris@82 289 Rm[WS(rs, 3)] = KP500000000 * (T21 + T2g);
Chris@82 290 T2P = T2m + T2j;
Chris@82 291 T2S = T2Q + T2R;
Chris@82 292 Ip[WS(rs, 2)] = KP500000000 * (T2P + T2S);
Chris@82 293 Im[WS(rs, 3)] = KP500000000 * (T2S - T2P);
Chris@82 294 }
Chris@82 295 {
Chris@82 296 E T2n, T2o, T2T, T2U;
Chris@82 297 T2n = T2j - T2m;
Chris@82 298 T2o = T2f - T28;
Chris@82 299 Ip[WS(rs, 5)] = KP500000000 * (T2n + T2o);
Chris@82 300 Im[0] = KP500000000 * (T2o - T2n);
Chris@82 301 T2T = T1X - T20;
Chris@82 302 T2U = T2R - T2Q;
Chris@82 303 Rm[0] = KP500000000 * (T2T - T2U);
Chris@82 304 Rp[WS(rs, 5)] = KP500000000 * (T2T + T2U);
Chris@82 305 }
Chris@82 306 {
Chris@82 307 E T2r, T2u, T2N, T2O;
Chris@82 308 T2r = T2p - T2q;
Chris@82 309 T2u = T2s - T2t;
Chris@82 310 Ip[WS(rs, 1)] = KP500000000 * (T2r + T2u);
Chris@82 311 Im[WS(rs, 4)] = KP500000000 * (T2u - T2r);
Chris@82 312 T2N = T2v - T2w;
Chris@82 313 T2O = T2L - T2F;
Chris@82 314 Rm[WS(rs, 4)] = KP500000000 * (T2N - T2O);
Chris@82 315 Rp[WS(rs, 1)] = KP500000000 * (T2N + T2O);
Chris@82 316 }
Chris@82 317 {
Chris@82 318 E T2x, T2y, T2z, T2M;
Chris@82 319 T2x = T2v + T2w;
Chris@82 320 T2y = T2t + T2s;
Chris@82 321 Rm[WS(rs, 1)] = KP500000000 * (T2x - T2y);
Chris@82 322 Rp[WS(rs, 4)] = KP500000000 * (T2x + T2y);
Chris@82 323 T2z = T2q + T2p;
Chris@82 324 T2M = T2F + T2L;
Chris@82 325 Ip[WS(rs, 4)] = KP500000000 * (T2z - T2M);
Chris@82 326 Im[WS(rs, 1)] = -(KP500000000 * (T2z + T2M));
Chris@82 327 }
Chris@82 328 }
Chris@82 329 }
Chris@82 330 }
Chris@82 331 }
Chris@82 332
Chris@82 333 static const tw_instr twinstr[] = {
Chris@82 334 {TW_FULL, 1, 12},
Chris@82 335 {TW_NEXT, 1, 0}
Chris@82 336 };
Chris@82 337
Chris@82 338 static const hc2c_desc desc = { 12, "hc2cfdft_12", twinstr, &GENUS, {96, 46, 46, 0} };
Chris@82 339
Chris@82 340 void X(codelet_hc2cfdft_12) (planner *p) {
Chris@82 341 X(khc2c_register) (p, hc2cfdft_12, &desc, HC2C_VIA_DFT);
Chris@82 342 }
Chris@82 343 #else
Chris@82 344
Chris@82 345 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cfdft_12 -include rdft/scalar/hc2cf.h */
Chris@82 346
Chris@82 347 /*
Chris@82 348 * This function contains 142 FP additions, 76 FP multiplications,
Chris@82 349 * (or, 112 additions, 46 multiplications, 30 fused multiply/add),
Chris@82 350 * 52 stack variables, 3 constants, and 48 memory accesses
Chris@82 351 */
Chris@82 352 #include "rdft/scalar/hc2cf.h"
Chris@82 353
Chris@82 354 static void hc2cfdft_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 355 {
Chris@82 356 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 357 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 358 DK(KP433012701, +0.433012701892219323381861585376468091735701313);
Chris@82 359 {
Chris@82 360 INT m;
Chris@82 361 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 362 E Tm, T1t, T1d, T2j, Tj, T1Y, T1w, T1G, T1q, T2q, T1U, T2k, Tw, T1y, T17;
Chris@82 363 E T2g, TP, T21, T1B, T1J, T12, T2u, T1P, T2h;
Chris@82 364 {
Chris@82 365 E Tk, Tl, T1k, T1m, T1n, T1o, T4, T1f, T8, T1h, Th, T1c, Td, T1a, T19;
Chris@82 366 E T1b;
Chris@82 367 {
Chris@82 368 E T2, T3, T6, T7;
Chris@82 369 Tk = Ip[0];
Chris@82 370 Tl = Im[0];
Chris@82 371 T1k = Tk + Tl;
Chris@82 372 T1m = Rp[0];
Chris@82 373 T1n = Rm[0];
Chris@82 374 T1o = T1m - T1n;
Chris@82 375 T2 = Ip[WS(rs, 2)];
Chris@82 376 T3 = Im[WS(rs, 2)];
Chris@82 377 T4 = T2 - T3;
Chris@82 378 T1f = T2 + T3;
Chris@82 379 T6 = Rp[WS(rs, 2)];
Chris@82 380 T7 = Rm[WS(rs, 2)];
Chris@82 381 T8 = T6 + T7;
Chris@82 382 T1h = T6 - T7;
Chris@82 383 {
Chris@82 384 E Tf, Tg, Tb, Tc;
Chris@82 385 Tf = Rp[WS(rs, 4)];
Chris@82 386 Tg = Rm[WS(rs, 4)];
Chris@82 387 Th = Tf + Tg;
Chris@82 388 T1c = Tf - Tg;
Chris@82 389 Tb = Ip[WS(rs, 4)];
Chris@82 390 Tc = Im[WS(rs, 4)];
Chris@82 391 Td = Tb - Tc;
Chris@82 392 T1a = Tb + Tc;
Chris@82 393 }
Chris@82 394 }
Chris@82 395 Tm = Tk - Tl;
Chris@82 396 T1t = T1m + T1n;
Chris@82 397 T19 = W[16];
Chris@82 398 T1b = W[17];
Chris@82 399 T1d = FNMS(T1b, T1c, T19 * T1a);
Chris@82 400 T2j = FMA(T19, T1c, T1b * T1a);
Chris@82 401 {
Chris@82 402 E T9, T1u, Ti, T1v;
Chris@82 403 {
Chris@82 404 E T1, T5, Ta, Te;
Chris@82 405 T1 = W[6];
Chris@82 406 T5 = W[7];
Chris@82 407 T9 = FNMS(T5, T8, T1 * T4);
Chris@82 408 T1u = FMA(T1, T8, T5 * T4);
Chris@82 409 Ta = W[14];
Chris@82 410 Te = W[15];
Chris@82 411 Ti = FNMS(Te, Th, Ta * Td);
Chris@82 412 T1v = FMA(Ta, Th, Te * Td);
Chris@82 413 }
Chris@82 414 Tj = T9 + Ti;
Chris@82 415 T1Y = KP433012701 * (T1v - T1u);
Chris@82 416 T1w = T1u + T1v;
Chris@82 417 T1G = KP433012701 * (T9 - Ti);
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E T1i, T1S, T1p, T1T;
Chris@82 421 {
Chris@82 422 E T1e, T1g, T1j, T1l;
Chris@82 423 T1e = W[8];
Chris@82 424 T1g = W[9];
Chris@82 425 T1i = FNMS(T1g, T1h, T1e * T1f);
Chris@82 426 T1S = FMA(T1e, T1h, T1g * T1f);
Chris@82 427 T1j = W[0];
Chris@82 428 T1l = W[1];
Chris@82 429 T1p = FNMS(T1l, T1o, T1j * T1k);
Chris@82 430 T1T = FMA(T1j, T1o, T1l * T1k);
Chris@82 431 }
Chris@82 432 T1q = T1i + T1p;
Chris@82 433 T2q = KP433012701 * (T1i - T1p);
Chris@82 434 T1U = KP433012701 * (T1S - T1T);
Chris@82 435 T2k = T1S + T1T;
Chris@82 436 }
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E Tr, TT, Tv, TV, TA, TY, TE, T10, TN, T14, TJ, T16;
Chris@82 440 {
Chris@82 441 E Tp, Tq, TC, TD;
Chris@82 442 Tp = Ip[WS(rs, 3)];
Chris@82 443 Tq = Im[WS(rs, 3)];
Chris@82 444 Tr = Tp - Tq;
Chris@82 445 TT = Tp + Tq;
Chris@82 446 {
Chris@82 447 E Tt, Tu, Ty, Tz;
Chris@82 448 Tt = Rp[WS(rs, 3)];
Chris@82 449 Tu = Rm[WS(rs, 3)];
Chris@82 450 Tv = Tt + Tu;
Chris@82 451 TV = Tt - Tu;
Chris@82 452 Ty = Ip[WS(rs, 5)];
Chris@82 453 Tz = Im[WS(rs, 5)];
Chris@82 454 TA = Ty - Tz;
Chris@82 455 TY = Ty + Tz;
Chris@82 456 }
Chris@82 457 TC = Rp[WS(rs, 5)];
Chris@82 458 TD = Rm[WS(rs, 5)];
Chris@82 459 TE = TC + TD;
Chris@82 460 T10 = TC - TD;
Chris@82 461 {
Chris@82 462 E TL, TM, TH, TI;
Chris@82 463 TL = Rp[WS(rs, 1)];
Chris@82 464 TM = Rm[WS(rs, 1)];
Chris@82 465 TN = TL + TM;
Chris@82 466 T14 = TM - TL;
Chris@82 467 TH = Ip[WS(rs, 1)];
Chris@82 468 TI = Im[WS(rs, 1)];
Chris@82 469 TJ = TH - TI;
Chris@82 470 T16 = TH + TI;
Chris@82 471 }
Chris@82 472 }
Chris@82 473 {
Chris@82 474 E To, Ts, T13, T15;
Chris@82 475 To = W[10];
Chris@82 476 Ts = W[11];
Chris@82 477 Tw = FNMS(Ts, Tv, To * Tr);
Chris@82 478 T1y = FMA(To, Tv, Ts * Tr);
Chris@82 479 T13 = W[5];
Chris@82 480 T15 = W[4];
Chris@82 481 T17 = FMA(T13, T14, T15 * T16);
Chris@82 482 T2g = FNMS(T13, T16, T15 * T14);
Chris@82 483 }
Chris@82 484 {
Chris@82 485 E TF, T1z, TO, T1A;
Chris@82 486 {
Chris@82 487 E Tx, TB, TG, TK;
Chris@82 488 Tx = W[18];
Chris@82 489 TB = W[19];
Chris@82 490 TF = FNMS(TB, TE, Tx * TA);
Chris@82 491 T1z = FMA(Tx, TE, TB * TA);
Chris@82 492 TG = W[2];
Chris@82 493 TK = W[3];
Chris@82 494 TO = FNMS(TK, TN, TG * TJ);
Chris@82 495 T1A = FMA(TG, TN, TK * TJ);
Chris@82 496 }
Chris@82 497 TP = TF + TO;
Chris@82 498 T21 = KP433012701 * (T1A - T1z);
Chris@82 499 T1B = T1z + T1A;
Chris@82 500 T1J = KP433012701 * (TF - TO);
Chris@82 501 }
Chris@82 502 {
Chris@82 503 E TW, T1O, T11, T1N;
Chris@82 504 {
Chris@82 505 E TS, TU, TX, TZ;
Chris@82 506 TS = W[12];
Chris@82 507 TU = W[13];
Chris@82 508 TW = FNMS(TU, TV, TS * TT);
Chris@82 509 T1O = FMA(TS, TV, TU * TT);
Chris@82 510 TX = W[20];
Chris@82 511 TZ = W[21];
Chris@82 512 T11 = FNMS(TZ, T10, TX * TY);
Chris@82 513 T1N = FMA(TX, T10, TZ * TY);
Chris@82 514 }
Chris@82 515 T12 = TW + T11;
Chris@82 516 T2u = KP433012701 * (T11 - TW);
Chris@82 517 T1P = KP433012701 * (T1N - T1O);
Chris@82 518 T2h = T1O + T1N;
Chris@82 519 }
Chris@82 520 }
Chris@82 521 {
Chris@82 522 E TR, T2f, T2m, T2o, T1s, T1E, T1D, T2n;
Chris@82 523 {
Chris@82 524 E Tn, TQ, T2i, T2l;
Chris@82 525 Tn = Tj + Tm;
Chris@82 526 TQ = Tw + TP;
Chris@82 527 TR = Tn - TQ;
Chris@82 528 T2f = TQ + Tn;
Chris@82 529 T2i = T2g - T2h;
Chris@82 530 T2l = T2j + T2k;
Chris@82 531 T2m = T2i - T2l;
Chris@82 532 T2o = T2i + T2l;
Chris@82 533 }
Chris@82 534 {
Chris@82 535 E T18, T1r, T1x, T1C;
Chris@82 536 T18 = T12 + T17;
Chris@82 537 T1r = T1d + T1q;
Chris@82 538 T1s = T18 - T1r;
Chris@82 539 T1E = T18 + T1r;
Chris@82 540 T1x = T1t + T1w;
Chris@82 541 T1C = T1y + T1B;
Chris@82 542 T1D = T1x + T1C;
Chris@82 543 T2n = T1x - T1C;
Chris@82 544 }
Chris@82 545 Ip[WS(rs, 3)] = KP500000000 * (TR + T1s);
Chris@82 546 Rp[WS(rs, 3)] = KP500000000 * (T2n - T2o);
Chris@82 547 Im[WS(rs, 2)] = KP500000000 * (T1s - TR);
Chris@82 548 Rm[WS(rs, 2)] = KP500000000 * (T2n + T2o);
Chris@82 549 Rm[WS(rs, 5)] = KP500000000 * (T1D - T1E);
Chris@82 550 Im[WS(rs, 5)] = KP500000000 * (T2m - T2f);
Chris@82 551 Rp[0] = KP500000000 * (T1D + T1E);
Chris@82 552 Ip[0] = KP500000000 * (T2f + T2m);
Chris@82 553 }
Chris@82 554 {
Chris@82 555 E T1H, T2b, T2s, T2B, T2v, T2A, T1K, T2c, T1Q, T29, T1Z, T25, T22, T26, T1V;
Chris@82 556 E T28;
Chris@82 557 {
Chris@82 558 E T1F, T2r, T2t, T1I;
Chris@82 559 T1F = FNMS(KP250000000, T1w, KP500000000 * T1t);
Chris@82 560 T1H = T1F - T1G;
Chris@82 561 T2b = T1F + T1G;
Chris@82 562 T2r = FNMS(KP500000000, T2j, KP250000000 * T2k);
Chris@82 563 T2s = T2q - T2r;
Chris@82 564 T2B = T2q + T2r;
Chris@82 565 T2t = FMA(KP250000000, T2h, KP500000000 * T2g);
Chris@82 566 T2v = T2t - T2u;
Chris@82 567 T2A = T2u + T2t;
Chris@82 568 T1I = FNMS(KP250000000, T1B, KP500000000 * T1y);
Chris@82 569 T1K = T1I - T1J;
Chris@82 570 T2c = T1I + T1J;
Chris@82 571 }
Chris@82 572 {
Chris@82 573 E T1M, T1X, T20, T1R;
Chris@82 574 T1M = FNMS(KP250000000, T12, KP500000000 * T17);
Chris@82 575 T1Q = T1M - T1P;
Chris@82 576 T29 = T1P + T1M;
Chris@82 577 T1X = FNMS(KP250000000, Tj, KP500000000 * Tm);
Chris@82 578 T1Z = T1X - T1Y;
Chris@82 579 T25 = T1Y + T1X;
Chris@82 580 T20 = FNMS(KP250000000, TP, KP500000000 * Tw);
Chris@82 581 T22 = T20 - T21;
Chris@82 582 T26 = T21 + T20;
Chris@82 583 T1R = FNMS(KP250000000, T1q, KP500000000 * T1d);
Chris@82 584 T1V = T1R - T1U;
Chris@82 585 T28 = T1R + T1U;
Chris@82 586 }
Chris@82 587 {
Chris@82 588 E T1L, T1W, T2p, T2w;
Chris@82 589 T1L = T1H + T1K;
Chris@82 590 T1W = T1Q + T1V;
Chris@82 591 Rp[WS(rs, 2)] = T1L - T1W;
Chris@82 592 Rm[WS(rs, 3)] = T1L + T1W;
Chris@82 593 T2p = T22 + T1Z;
Chris@82 594 T2w = T2s - T2v;
Chris@82 595 Ip[WS(rs, 2)] = T2p + T2w;
Chris@82 596 Im[WS(rs, 3)] = T2w - T2p;
Chris@82 597 }
Chris@82 598 {
Chris@82 599 E T23, T24, T2x, T2y;
Chris@82 600 T23 = T1Z - T22;
Chris@82 601 T24 = T1V - T1Q;
Chris@82 602 Ip[WS(rs, 5)] = T23 + T24;
Chris@82 603 Im[0] = T24 - T23;
Chris@82 604 T2x = T1H - T1K;
Chris@82 605 T2y = T2v + T2s;
Chris@82 606 Rm[0] = T2x - T2y;
Chris@82 607 Rp[WS(rs, 5)] = T2x + T2y;
Chris@82 608 }
Chris@82 609 {
Chris@82 610 E T27, T2a, T2z, T2C;
Chris@82 611 T27 = T25 - T26;
Chris@82 612 T2a = T28 - T29;
Chris@82 613 Ip[WS(rs, 1)] = T27 + T2a;
Chris@82 614 Im[WS(rs, 4)] = T2a - T27;
Chris@82 615 T2z = T2b - T2c;
Chris@82 616 T2C = T2A - T2B;
Chris@82 617 Rm[WS(rs, 4)] = T2z - T2C;
Chris@82 618 Rp[WS(rs, 1)] = T2z + T2C;
Chris@82 619 }
Chris@82 620 {
Chris@82 621 E T2d, T2e, T2D, T2E;
Chris@82 622 T2d = T2b + T2c;
Chris@82 623 T2e = T29 + T28;
Chris@82 624 Rm[WS(rs, 1)] = T2d - T2e;
Chris@82 625 Rp[WS(rs, 4)] = T2d + T2e;
Chris@82 626 T2D = T26 + T25;
Chris@82 627 T2E = T2A + T2B;
Chris@82 628 Ip[WS(rs, 4)] = T2D + T2E;
Chris@82 629 Im[WS(rs, 1)] = T2E - T2D;
Chris@82 630 }
Chris@82 631 }
Chris@82 632 }
Chris@82 633 }
Chris@82 634 }
Chris@82 635
Chris@82 636 static const tw_instr twinstr[] = {
Chris@82 637 {TW_FULL, 1, 12},
Chris@82 638 {TW_NEXT, 1, 0}
Chris@82 639 };
Chris@82 640
Chris@82 641 static const hc2c_desc desc = { 12, "hc2cfdft_12", twinstr, &GENUS, {112, 46, 30, 0} };
Chris@82 642
Chris@82 643 void X(codelet_hc2cfdft_12) (planner *p) {
Chris@82 644 X(khc2c_register) (p, hc2cfdft_12, &desc, HC2C_VIA_DFT);
Chris@82 645 }
Chris@82 646 #endif