annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:11 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cfdft_10 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 122 FP additions, 92 FP multiplications,
Chris@82 32 * (or, 68 additions, 38 multiplications, 54 fused multiply/add),
Chris@82 33 * 81 stack variables, 5 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 44 {
Chris@82 45 INT m;
Chris@82 46 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 47 E T3, T1u, Td, T1w, T1S, T2f, T14, T1p, T1j, T1q, T1N, T2e, TQ, T2i, T1n;
Chris@82 48 E T1H, Tz, T2h, T1m, T1C;
Chris@82 49 {
Chris@82 50 E T1, T2, T1h, Tc, TW, T1c, T1d, T1b, T1f, T1g, T1Q, T7, TV, T1J, TS;
Chris@82 51 E TU, Ts, Tx, T19, T18, T1O, T15, T17, Tt, T1A, Ti, Tn, TE, TD, T1F;
Chris@82 52 E TA, TC, Tj, T1y, TJ, TO, T12, T11, T1L, TY, T10, TK, T1D;
Chris@82 53 {
Chris@82 54 E Ta, Tb, T1e, T5, T6, TT;
Chris@82 55 T1 = Ip[0];
Chris@82 56 T2 = Im[0];
Chris@82 57 T1h = T1 + T2;
Chris@82 58 Ta = Rp[WS(rs, 2)];
Chris@82 59 Tb = Rm[WS(rs, 2)];
Chris@82 60 Tc = Ta - Tb;
Chris@82 61 TW = Ta + Tb;
Chris@82 62 T1c = Rm[0];
Chris@82 63 T1d = Rp[0];
Chris@82 64 T1e = T1c - T1d;
Chris@82 65 T1b = W[0];
Chris@82 66 T1f = T1b * T1e;
Chris@82 67 T1g = W[1];
Chris@82 68 T1Q = T1g * T1e;
Chris@82 69 T5 = Ip[WS(rs, 2)];
Chris@82 70 T6 = Im[WS(rs, 2)];
Chris@82 71 TT = T5 - T6;
Chris@82 72 T7 = T5 + T6;
Chris@82 73 TV = W[7];
Chris@82 74 T1J = TV * TT;
Chris@82 75 TS = W[6];
Chris@82 76 TU = TS * TT;
Chris@82 77 {
Chris@82 78 E Tq, Tr, T16, Tv, Tw, Tp;
Chris@82 79 Tq = Rm[WS(rs, 3)];
Chris@82 80 Tr = Rp[WS(rs, 3)];
Chris@82 81 Ts = Tq - Tr;
Chris@82 82 Tv = Ip[WS(rs, 3)];
Chris@82 83 Tw = Im[WS(rs, 3)];
Chris@82 84 Tx = Tv + Tw;
Chris@82 85 T16 = Tv - Tw;
Chris@82 86 T19 = Tr + Tq;
Chris@82 87 T18 = W[11];
Chris@82 88 T1O = T18 * T16;
Chris@82 89 T15 = W[10];
Chris@82 90 T17 = T15 * T16;
Chris@82 91 Tp = W[12];
Chris@82 92 Tt = Tp * Ts;
Chris@82 93 T1A = Tp * Tx;
Chris@82 94 }
Chris@82 95 {
Chris@82 96 E Tg, Th, TB, Tl, Tm, Tf;
Chris@82 97 Tg = Ip[WS(rs, 1)];
Chris@82 98 Th = Im[WS(rs, 1)];
Chris@82 99 Ti = Tg - Th;
Chris@82 100 Tl = Rp[WS(rs, 1)];
Chris@82 101 Tm = Rm[WS(rs, 1)];
Chris@82 102 Tn = Tl + Tm;
Chris@82 103 TB = Tm - Tl;
Chris@82 104 TE = Tg + Th;
Chris@82 105 TD = W[5];
Chris@82 106 T1F = TD * TB;
Chris@82 107 TA = W[4];
Chris@82 108 TC = TA * TB;
Chris@82 109 Tf = W[2];
Chris@82 110 Tj = Tf * Ti;
Chris@82 111 T1y = Tf * Tn;
Chris@82 112 }
Chris@82 113 {
Chris@82 114 E TH, TI, TZ, TM, TN, TG;
Chris@82 115 TH = Ip[WS(rs, 4)];
Chris@82 116 TI = Im[WS(rs, 4)];
Chris@82 117 TJ = TH - TI;
Chris@82 118 TM = Rp[WS(rs, 4)];
Chris@82 119 TN = Rm[WS(rs, 4)];
Chris@82 120 TO = TM + TN;
Chris@82 121 TZ = TN - TM;
Chris@82 122 T12 = TH + TI;
Chris@82 123 T11 = W[17];
Chris@82 124 T1L = T11 * TZ;
Chris@82 125 TY = W[16];
Chris@82 126 T10 = TY * TZ;
Chris@82 127 TG = W[14];
Chris@82 128 TK = TG * TJ;
Chris@82 129 T1D = TG * TO;
Chris@82 130 }
Chris@82 131 }
Chris@82 132 {
Chris@82 133 E T1P, T1R, T1K, T1M;
Chris@82 134 T3 = T1 - T2;
Chris@82 135 T1u = T1d + T1c;
Chris@82 136 {
Chris@82 137 E T4, T8, T9, T1v;
Chris@82 138 T4 = W[9];
Chris@82 139 T8 = T4 * T7;
Chris@82 140 T9 = W[8];
Chris@82 141 T1v = T9 * T7;
Chris@82 142 Td = FMA(T9, Tc, T8);
Chris@82 143 T1w = FNMS(T4, Tc, T1v);
Chris@82 144 }
Chris@82 145 T1P = FMA(T15, T19, T1O);
Chris@82 146 T1R = FMA(T1b, T1h, T1Q);
Chris@82 147 T1S = T1P - T1R;
Chris@82 148 T2f = T1P + T1R;
Chris@82 149 {
Chris@82 150 E TX, T13, T1a, T1i;
Chris@82 151 TX = FNMS(TV, TW, TU);
Chris@82 152 T13 = FNMS(T11, T12, T10);
Chris@82 153 T14 = TX + T13;
Chris@82 154 T1p = T13 - TX;
Chris@82 155 T1a = FNMS(T18, T19, T17);
Chris@82 156 T1i = FNMS(T1g, T1h, T1f);
Chris@82 157 T1j = T1a + T1i;
Chris@82 158 T1q = T1i - T1a;
Chris@82 159 }
Chris@82 160 T1K = FMA(TS, TW, T1J);
Chris@82 161 T1M = FMA(TY, T12, T1L);
Chris@82 162 T1N = T1K - T1M;
Chris@82 163 T2e = T1K + T1M;
Chris@82 164 {
Chris@82 165 E TF, T1G, TP, T1E, TL;
Chris@82 166 TF = FNMS(TD, TE, TC);
Chris@82 167 T1G = FMA(TA, TE, T1F);
Chris@82 168 TL = W[15];
Chris@82 169 TP = FNMS(TL, TO, TK);
Chris@82 170 T1E = FMA(TL, TJ, T1D);
Chris@82 171 TQ = TF + TP;
Chris@82 172 T2i = T1G + T1E;
Chris@82 173 T1n = TF - TP;
Chris@82 174 T1H = T1E - T1G;
Chris@82 175 }
Chris@82 176 {
Chris@82 177 E To, T1z, Ty, T1B, Tk, Tu;
Chris@82 178 Tk = W[3];
Chris@82 179 To = FNMS(Tk, Tn, Tj);
Chris@82 180 T1z = FMA(Tk, Ti, T1y);
Chris@82 181 Tu = W[13];
Chris@82 182 Ty = FNMS(Tu, Tx, Tt);
Chris@82 183 T1B = FMA(Tu, Ts, T1A);
Chris@82 184 Tz = To + Ty;
Chris@82 185 T2h = T1z + T1B;
Chris@82 186 T1m = Ty - To;
Chris@82 187 T1C = T1z - T1B;
Chris@82 188 }
Chris@82 189 }
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E T2k, T2m, Te, T1l, T2b, T2c, T2l, T2d;
Chris@82 193 {
Chris@82 194 E T2g, T2j, TR, T1k;
Chris@82 195 T2g = T2e - T2f;
Chris@82 196 T2j = T2h - T2i;
Chris@82 197 T2k = FNMS(KP618033988, T2j, T2g);
Chris@82 198 T2m = FMA(KP618033988, T2g, T2j);
Chris@82 199 Te = T3 - Td;
Chris@82 200 TR = Tz + TQ;
Chris@82 201 T1k = T14 + T1j;
Chris@82 202 T1l = TR + T1k;
Chris@82 203 T2b = FNMS(KP250000000, T1l, Te);
Chris@82 204 T2c = TR - T1k;
Chris@82 205 }
Chris@82 206 Ip[0] = KP500000000 * (Te + T1l);
Chris@82 207 T2l = FMA(KP559016994, T2c, T2b);
Chris@82 208 Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T2m, T2l));
Chris@82 209 Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T2m, T2l)));
Chris@82 210 T2d = FNMS(KP559016994, T2c, T2b);
Chris@82 211 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T2k, T2d));
Chris@82 212 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T2k, T2d)));
Chris@82 213 }
Chris@82 214 {
Chris@82 215 E T2w, T2y, T2n, T2q, T2r, T2s, T2x, T2t;
Chris@82 216 {
Chris@82 217 E T2u, T2v, T2o, T2p;
Chris@82 218 T2u = T14 - T1j;
Chris@82 219 T2v = Tz - TQ;
Chris@82 220 T2w = FNMS(KP618033988, T2v, T2u);
Chris@82 221 T2y = FMA(KP618033988, T2u, T2v);
Chris@82 222 T2n = T1u + T1w;
Chris@82 223 T2o = T2h + T2i;
Chris@82 224 T2p = T2e + T2f;
Chris@82 225 T2q = T2o + T2p;
Chris@82 226 T2r = FNMS(KP250000000, T2q, T2n);
Chris@82 227 T2s = T2o - T2p;
Chris@82 228 }
Chris@82 229 Rp[0] = KP500000000 * (T2n + T2q);
Chris@82 230 T2x = FMA(KP559016994, T2s, T2r);
Chris@82 231 Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T2y, T2x));
Chris@82 232 Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T2y, T2x));
Chris@82 233 T2t = FNMS(KP559016994, T2s, T2r);
Chris@82 234 Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T2w, T2t));
Chris@82 235 Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T2w, T2t));
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E T28, T2a, T1t, T1s, T23, T24, T29, T25;
Chris@82 239 {
Chris@82 240 E T26, T27, T1o, T1r;
Chris@82 241 T26 = T1H - T1C;
Chris@82 242 T27 = T1S - T1N;
Chris@82 243 T28 = FMA(KP618033988, T27, T26);
Chris@82 244 T2a = FNMS(KP618033988, T26, T27);
Chris@82 245 T1t = Td + T3;
Chris@82 246 T1o = T1m + T1n;
Chris@82 247 T1r = T1p + T1q;
Chris@82 248 T1s = T1o + T1r;
Chris@82 249 T23 = FMA(KP250000000, T1s, T1t);
Chris@82 250 T24 = T1r - T1o;
Chris@82 251 }
Chris@82 252 Im[WS(rs, 4)] = KP500000000 * (T1s - T1t);
Chris@82 253 T29 = FNMS(KP559016994, T24, T23);
Chris@82 254 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T2a, T29));
Chris@82 255 Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP951056516, T2a, T29)));
Chris@82 256 T25 = FMA(KP559016994, T24, T23);
Chris@82 257 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T28, T25));
Chris@82 258 Im[0] = -(KP500000000 * (FNMS(KP951056516, T28, T25)));
Chris@82 259 }
Chris@82 260 {
Chris@82 261 E T20, T22, T1x, T1U, T1V, T1W, T21, T1X;
Chris@82 262 {
Chris@82 263 E T1Y, T1Z, T1I, T1T;
Chris@82 264 T1Y = T1n - T1m;
Chris@82 265 T1Z = T1q - T1p;
Chris@82 266 T20 = FMA(KP618033988, T1Z, T1Y);
Chris@82 267 T22 = FNMS(KP618033988, T1Y, T1Z);
Chris@82 268 T1x = T1u - T1w;
Chris@82 269 T1I = T1C + T1H;
Chris@82 270 T1T = T1N + T1S;
Chris@82 271 T1U = T1I + T1T;
Chris@82 272 T1V = FNMS(KP250000000, T1U, T1x);
Chris@82 273 T1W = T1I - T1T;
Chris@82 274 }
Chris@82 275 Rm[WS(rs, 4)] = KP500000000 * (T1x + T1U);
Chris@82 276 T21 = FNMS(KP559016994, T1W, T1V);
Chris@82 277 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T22, T21));
Chris@82 278 Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T22, T21));
Chris@82 279 T1X = FMA(KP559016994, T1W, T1V);
Chris@82 280 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T20, T1X));
Chris@82 281 Rm[0] = KP500000000 * (FNMS(KP951056516, T20, T1X));
Chris@82 282 }
Chris@82 283 }
Chris@82 284 }
Chris@82 285 }
Chris@82 286
Chris@82 287 static const tw_instr twinstr[] = {
Chris@82 288 {TW_FULL, 1, 10},
Chris@82 289 {TW_NEXT, 1, 0}
Chris@82 290 };
Chris@82 291
Chris@82 292 static const hc2c_desc desc = { 10, "hc2cfdft_10", twinstr, &GENUS, {68, 38, 54, 0} };
Chris@82 293
Chris@82 294 void X(codelet_hc2cfdft_10) (planner *p) {
Chris@82 295 X(khc2c_register) (p, hc2cfdft_10, &desc, HC2C_VIA_DFT);
Chris@82 296 }
Chris@82 297 #else
Chris@82 298
Chris@82 299 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cfdft_10 -include rdft/scalar/hc2cf.h */
Chris@82 300
Chris@82 301 /*
Chris@82 302 * This function contains 122 FP additions, 68 FP multiplications,
Chris@82 303 * (or, 92 additions, 38 multiplications, 30 fused multiply/add),
Chris@82 304 * 62 stack variables, 5 constants, and 40 memory accesses
Chris@82 305 */
Chris@82 306 #include "rdft/scalar/hc2cf.h"
Chris@82 307
Chris@82 308 static void hc2cfdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 309 {
Chris@82 310 DK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@82 311 DK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@82 312 DK(KP125000000, +0.125000000000000000000000000000000000000000000);
Chris@82 313 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 314 DK(KP279508497, +0.279508497187473712051146708591409529430077295);
Chris@82 315 {
Chris@82 316 INT m;
Chris@82 317 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 318 E Tw, TL, TM, T1W, T1X, T27, T1Z, T20, T26, TX, T1a, T1b, T1d, T1e, T1f;
Chris@82 319 E T1q, T1t, T1u, T1x, T1A, T1B, T1g, T1h, T1i, Td, T25, T1k, T1F;
Chris@82 320 {
Chris@82 321 E T3, T1D, T19, T1z, T7, Tb, TR, T1v, Tm, T1o, TK, T1s, Tv, T1p, T12;
Chris@82 322 E T1y, TF, T1r, TW, T1w;
Chris@82 323 {
Chris@82 324 E T1, T2, T18, T14, T15, T16, T13, T17;
Chris@82 325 T1 = Ip[0];
Chris@82 326 T2 = Im[0];
Chris@82 327 T18 = T1 + T2;
Chris@82 328 T14 = Rm[0];
Chris@82 329 T15 = Rp[0];
Chris@82 330 T16 = T14 - T15;
Chris@82 331 T3 = T1 - T2;
Chris@82 332 T1D = T15 + T14;
Chris@82 333 T13 = W[0];
Chris@82 334 T17 = W[1];
Chris@82 335 T19 = FNMS(T17, T18, T13 * T16);
Chris@82 336 T1z = FMA(T17, T16, T13 * T18);
Chris@82 337 }
Chris@82 338 {
Chris@82 339 E T5, T6, TO, T9, Ta, TQ, TN, TP;
Chris@82 340 T5 = Ip[WS(rs, 2)];
Chris@82 341 T6 = Im[WS(rs, 2)];
Chris@82 342 TO = T5 - T6;
Chris@82 343 T9 = Rp[WS(rs, 2)];
Chris@82 344 Ta = Rm[WS(rs, 2)];
Chris@82 345 TQ = T9 + Ta;
Chris@82 346 T7 = T5 + T6;
Chris@82 347 Tb = T9 - Ta;
Chris@82 348 TN = W[6];
Chris@82 349 TP = W[7];
Chris@82 350 TR = FNMS(TP, TQ, TN * TO);
Chris@82 351 T1v = FMA(TP, TO, TN * TQ);
Chris@82 352 }
Chris@82 353 {
Chris@82 354 E Th, TJ, Tl, TH;
Chris@82 355 {
Chris@82 356 E Tf, Tg, Tj, Tk;
Chris@82 357 Tf = Ip[WS(rs, 1)];
Chris@82 358 Tg = Im[WS(rs, 1)];
Chris@82 359 Th = Tf - Tg;
Chris@82 360 TJ = Tf + Tg;
Chris@82 361 Tj = Rp[WS(rs, 1)];
Chris@82 362 Tk = Rm[WS(rs, 1)];
Chris@82 363 Tl = Tj + Tk;
Chris@82 364 TH = Tj - Tk;
Chris@82 365 }
Chris@82 366 {
Chris@82 367 E Te, Ti, TG, TI;
Chris@82 368 Te = W[2];
Chris@82 369 Ti = W[3];
Chris@82 370 Tm = FNMS(Ti, Tl, Te * Th);
Chris@82 371 T1o = FMA(Te, Tl, Ti * Th);
Chris@82 372 TG = W[4];
Chris@82 373 TI = W[5];
Chris@82 374 TK = FMA(TG, TH, TI * TJ);
Chris@82 375 T1s = FNMS(TI, TH, TG * TJ);
Chris@82 376 }
Chris@82 377 }
Chris@82 378 {
Chris@82 379 E Tq, TZ, Tu, T11;
Chris@82 380 {
Chris@82 381 E To, Tp, Ts, Tt;
Chris@82 382 To = Ip[WS(rs, 3)];
Chris@82 383 Tp = Im[WS(rs, 3)];
Chris@82 384 Tq = To + Tp;
Chris@82 385 TZ = To - Tp;
Chris@82 386 Ts = Rp[WS(rs, 3)];
Chris@82 387 Tt = Rm[WS(rs, 3)];
Chris@82 388 Tu = Ts - Tt;
Chris@82 389 T11 = Ts + Tt;
Chris@82 390 }
Chris@82 391 {
Chris@82 392 E Tn, Tr, TY, T10;
Chris@82 393 Tn = W[13];
Chris@82 394 Tr = W[12];
Chris@82 395 Tv = FMA(Tn, Tq, Tr * Tu);
Chris@82 396 T1p = FNMS(Tn, Tu, Tr * Tq);
Chris@82 397 TY = W[10];
Chris@82 398 T10 = W[11];
Chris@82 399 T12 = FNMS(T10, T11, TY * TZ);
Chris@82 400 T1y = FMA(T10, TZ, TY * T11);
Chris@82 401 }
Chris@82 402 }
Chris@82 403 {
Chris@82 404 E TA, TV, TE, TT;
Chris@82 405 {
Chris@82 406 E Ty, Tz, TC, TD;
Chris@82 407 Ty = Ip[WS(rs, 4)];
Chris@82 408 Tz = Im[WS(rs, 4)];
Chris@82 409 TA = Ty - Tz;
Chris@82 410 TV = Ty + Tz;
Chris@82 411 TC = Rp[WS(rs, 4)];
Chris@82 412 TD = Rm[WS(rs, 4)];
Chris@82 413 TE = TC + TD;
Chris@82 414 TT = TC - TD;
Chris@82 415 }
Chris@82 416 {
Chris@82 417 E Tx, TB, TS, TU;
Chris@82 418 Tx = W[14];
Chris@82 419 TB = W[15];
Chris@82 420 TF = FNMS(TB, TE, Tx * TA);
Chris@82 421 T1r = FMA(Tx, TE, TB * TA);
Chris@82 422 TS = W[16];
Chris@82 423 TU = W[17];
Chris@82 424 TW = FMA(TS, TT, TU * TV);
Chris@82 425 T1w = FNMS(TU, TT, TS * TV);
Chris@82 426 }
Chris@82 427 }
Chris@82 428 Tw = Tm - Tv;
Chris@82 429 TL = TF - TK;
Chris@82 430 TM = Tw + TL;
Chris@82 431 T1W = T1v + T1w;
Chris@82 432 T1X = T1y + T1z;
Chris@82 433 T27 = T1W + T1X;
Chris@82 434 T1Z = T1o + T1p;
Chris@82 435 T20 = T1s + T1r;
Chris@82 436 T26 = T1Z + T20;
Chris@82 437 TX = TR - TW;
Chris@82 438 T1a = T12 + T19;
Chris@82 439 T1b = TX + T1a;
Chris@82 440 T1d = T19 - T12;
Chris@82 441 T1e = TR + TW;
Chris@82 442 T1f = T1d - T1e;
Chris@82 443 T1q = T1o - T1p;
Chris@82 444 T1t = T1r - T1s;
Chris@82 445 T1u = T1q + T1t;
Chris@82 446 T1x = T1v - T1w;
Chris@82 447 T1A = T1y - T1z;
Chris@82 448 T1B = T1x + T1A;
Chris@82 449 T1g = Tm + Tv;
Chris@82 450 T1h = TK + TF;
Chris@82 451 T1i = T1g + T1h;
Chris@82 452 {
Chris@82 453 E Tc, T1E, T4, T8;
Chris@82 454 T4 = W[9];
Chris@82 455 T8 = W[8];
Chris@82 456 Tc = FMA(T4, T7, T8 * Tb);
Chris@82 457 T1E = FNMS(T4, Tb, T8 * T7);
Chris@82 458 Td = T3 - Tc;
Chris@82 459 T25 = T1D + T1E;
Chris@82 460 T1k = Tc + T3;
Chris@82 461 T1F = T1D - T1E;
Chris@82 462 }
Chris@82 463 }
Chris@82 464 {
Chris@82 465 E T1U, T1c, T1T, T22, T24, T1Y, T21, T23, T1V;
Chris@82 466 T1U = KP279508497 * (TM - T1b);
Chris@82 467 T1c = TM + T1b;
Chris@82 468 T1T = FNMS(KP125000000, T1c, KP500000000 * Td);
Chris@82 469 T1Y = T1W - T1X;
Chris@82 470 T21 = T1Z - T20;
Chris@82 471 T22 = FNMS(KP293892626, T21, KP475528258 * T1Y);
Chris@82 472 T24 = FMA(KP475528258, T21, KP293892626 * T1Y);
Chris@82 473 Ip[0] = KP500000000 * (Td + T1c);
Chris@82 474 T23 = T1U + T1T;
Chris@82 475 Ip[WS(rs, 4)] = T23 + T24;
Chris@82 476 Im[WS(rs, 3)] = T24 - T23;
Chris@82 477 T1V = T1T - T1U;
Chris@82 478 Ip[WS(rs, 2)] = T1V + T22;
Chris@82 479 Im[WS(rs, 1)] = T22 - T1V;
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E T2a, T28, T29, T2e, T2g, T2c, T2d, T2f, T2b;
Chris@82 483 T2a = KP279508497 * (T26 - T27);
Chris@82 484 T28 = T26 + T27;
Chris@82 485 T29 = FNMS(KP125000000, T28, KP500000000 * T25);
Chris@82 486 T2c = TX - T1a;
Chris@82 487 T2d = Tw - TL;
Chris@82 488 T2e = FNMS(KP293892626, T2d, KP475528258 * T2c);
Chris@82 489 T2g = FMA(KP475528258, T2d, KP293892626 * T2c);
Chris@82 490 Rp[0] = KP500000000 * (T25 + T28);
Chris@82 491 T2f = T2a + T29;
Chris@82 492 Rp[WS(rs, 4)] = T2f - T2g;
Chris@82 493 Rm[WS(rs, 3)] = T2g + T2f;
Chris@82 494 T2b = T29 - T2a;
Chris@82 495 Rp[WS(rs, 2)] = T2b - T2e;
Chris@82 496 Rm[WS(rs, 1)] = T2e + T2b;
Chris@82 497 }
Chris@82 498 {
Chris@82 499 E T1M, T1j, T1L, T1Q, T1S, T1O, T1P, T1R, T1N;
Chris@82 500 T1M = KP279508497 * (T1i + T1f);
Chris@82 501 T1j = T1f - T1i;
Chris@82 502 T1L = FMA(KP500000000, T1k, KP125000000 * T1j);
Chris@82 503 T1O = T1A - T1x;
Chris@82 504 T1P = T1q - T1t;
Chris@82 505 T1Q = FNMS(KP475528258, T1P, KP293892626 * T1O);
Chris@82 506 T1S = FMA(KP293892626, T1P, KP475528258 * T1O);
Chris@82 507 Im[WS(rs, 4)] = KP500000000 * (T1j - T1k);
Chris@82 508 T1R = T1L - T1M;
Chris@82 509 Ip[WS(rs, 3)] = T1R + T1S;
Chris@82 510 Im[WS(rs, 2)] = T1S - T1R;
Chris@82 511 T1N = T1L + T1M;
Chris@82 512 Ip[WS(rs, 1)] = T1N + T1Q;
Chris@82 513 Im[0] = T1Q - T1N;
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T1C, T1G, T1H, T1n, T1J, T1l, T1m, T1K, T1I;
Chris@82 517 T1C = KP279508497 * (T1u - T1B);
Chris@82 518 T1G = T1u + T1B;
Chris@82 519 T1H = FNMS(KP125000000, T1G, KP500000000 * T1F);
Chris@82 520 T1l = T1g - T1h;
Chris@82 521 T1m = T1e + T1d;
Chris@82 522 T1n = FMA(KP475528258, T1l, KP293892626 * T1m);
Chris@82 523 T1J = FNMS(KP293892626, T1l, KP475528258 * T1m);
Chris@82 524 Rm[WS(rs, 4)] = KP500000000 * (T1F + T1G);
Chris@82 525 T1K = T1H - T1C;
Chris@82 526 Rp[WS(rs, 3)] = T1J + T1K;
Chris@82 527 Rm[WS(rs, 2)] = T1K - T1J;
Chris@82 528 T1I = T1C + T1H;
Chris@82 529 Rp[WS(rs, 1)] = T1n + T1I;
Chris@82 530 Rm[0] = T1I - T1n;
Chris@82 531 }
Chris@82 532 }
Chris@82 533 }
Chris@82 534 }
Chris@82 535
Chris@82 536 static const tw_instr twinstr[] = {
Chris@82 537 {TW_FULL, 1, 10},
Chris@82 538 {TW_NEXT, 1, 0}
Chris@82 539 };
Chris@82 540
Chris@82 541 static const hc2c_desc desc = { 10, "hc2cfdft_10", twinstr, &GENUS, {92, 38, 30, 0} };
Chris@82 542
Chris@82 543 void X(codelet_hc2cfdft_10) (planner *p) {
Chris@82 544 X(khc2c_register) (p, hc2cfdft_10, &desc, HC2C_VIA_DFT);
Chris@82 545 }
Chris@82 546 #endif