annotate src/fftw-3.3.5/rdft/scalar/r2cf/hc2cf_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:48:06 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include hc2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@42 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@42 33 * 70 stack variables, 4 constants, and 40 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cf.h"
Chris@42 36
Chris@42 37 static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@42 46 E T1X, T21, T20, T22;
Chris@42 47 {
Chris@42 48 E T26, T1U, T8, T12, T1n, T1P, T24, T1K, T1Y, T18, T10, T2b, T1H, T23, T15;
Chris@42 49 E T1Z, T2a, Tz, T1O, T1y;
Chris@42 50 {
Chris@42 51 E T1, T1T, T3, T6, T2, T5;
Chris@42 52 T1 = Rp[0];
Chris@42 53 T1T = Rm[0];
Chris@42 54 T3 = Ip[WS(rs, 2)];
Chris@42 55 T6 = Im[WS(rs, 2)];
Chris@42 56 T2 = W[8];
Chris@42 57 T5 = W[9];
Chris@42 58 {
Chris@42 59 E T1l, TY, T1h, T1J, TM, T16, T1j, TS;
Chris@42 60 {
Chris@42 61 E TF, T1e, TO, TR, T1g, TL, TN, TQ, T1i, TP;
Chris@42 62 {
Chris@42 63 E TU, TX, TT, TW;
Chris@42 64 {
Chris@42 65 E TB, TE, T1R, T4, TA, TD;
Chris@42 66 TB = Rp[WS(rs, 2)];
Chris@42 67 TE = Rm[WS(rs, 2)];
Chris@42 68 T1R = T2 * T6;
Chris@42 69 T4 = T2 * T3;
Chris@42 70 TA = W[6];
Chris@42 71 TD = W[7];
Chris@42 72 {
Chris@42 73 E T1S, T7, T1d, TC;
Chris@42 74 T1S = FNMS(T5, T3, T1R);
Chris@42 75 T7 = FMA(T5, T6, T4);
Chris@42 76 T1d = TA * TE;
Chris@42 77 TC = TA * TB;
Chris@42 78 T26 = T1T - T1S;
Chris@42 79 T1U = T1S + T1T;
Chris@42 80 T8 = T1 - T7;
Chris@42 81 T12 = T1 + T7;
Chris@42 82 TF = FMA(TD, TE, TC);
Chris@42 83 T1e = FNMS(TD, TB, T1d);
Chris@42 84 }
Chris@42 85 }
Chris@42 86 TU = Ip[0];
Chris@42 87 TX = Im[0];
Chris@42 88 TT = W[0];
Chris@42 89 TW = W[1];
Chris@42 90 {
Chris@42 91 E TH, TK, TJ, T1f, TI, T1k, TV, TG;
Chris@42 92 TH = Ip[WS(rs, 4)];
Chris@42 93 TK = Im[WS(rs, 4)];
Chris@42 94 T1k = TT * TX;
Chris@42 95 TV = TT * TU;
Chris@42 96 TG = W[16];
Chris@42 97 TJ = W[17];
Chris@42 98 T1l = FNMS(TW, TU, T1k);
Chris@42 99 TY = FMA(TW, TX, TV);
Chris@42 100 T1f = TG * TK;
Chris@42 101 TI = TG * TH;
Chris@42 102 TO = Rp[WS(rs, 3)];
Chris@42 103 TR = Rm[WS(rs, 3)];
Chris@42 104 T1g = FNMS(TJ, TH, T1f);
Chris@42 105 TL = FMA(TJ, TK, TI);
Chris@42 106 TN = W[10];
Chris@42 107 TQ = W[11];
Chris@42 108 }
Chris@42 109 }
Chris@42 110 T1h = T1e + T1g;
Chris@42 111 T1J = T1g - T1e;
Chris@42 112 TM = TF - TL;
Chris@42 113 T16 = TF + TL;
Chris@42 114 T1i = TN * TR;
Chris@42 115 TP = TN * TO;
Chris@42 116 T1j = FNMS(TQ, TO, T1i);
Chris@42 117 TS = FMA(TQ, TR, TP);
Chris@42 118 }
Chris@42 119 {
Chris@42 120 E T1p, Te, T1w, Tx, Tn, Tq, Tp, T1r, Tk, T1t, To;
Chris@42 121 {
Chris@42 122 E Tt, Tw, Tv, T1v, Tu;
Chris@42 123 {
Chris@42 124 E Ta, Td, T9, Tc, T1o, Tb, Ts;
Chris@42 125 Ta = Rp[WS(rs, 1)];
Chris@42 126 Td = Rm[WS(rs, 1)];
Chris@42 127 {
Chris@42 128 E T1I, T1m, TZ, T17;
Chris@42 129 T1I = T1l - T1j;
Chris@42 130 T1m = T1j + T1l;
Chris@42 131 TZ = TS - TY;
Chris@42 132 T17 = TS + TY;
Chris@42 133 T1n = T1h - T1m;
Chris@42 134 T1P = T1h + T1m;
Chris@42 135 T24 = T1J + T1I;
Chris@42 136 T1K = T1I - T1J;
Chris@42 137 T1Y = T16 - T17;
Chris@42 138 T18 = T16 + T17;
Chris@42 139 T10 = TM + TZ;
Chris@42 140 T2b = TZ - TM;
Chris@42 141 T9 = W[2];
Chris@42 142 }
Chris@42 143 Tc = W[3];
Chris@42 144 Tt = Ip[WS(rs, 1)];
Chris@42 145 Tw = Im[WS(rs, 1)];
Chris@42 146 T1o = T9 * Td;
Chris@42 147 Tb = T9 * Ta;
Chris@42 148 Ts = W[4];
Chris@42 149 Tv = W[5];
Chris@42 150 T1p = FNMS(Tc, Ta, T1o);
Chris@42 151 Te = FMA(Tc, Td, Tb);
Chris@42 152 T1v = Ts * Tw;
Chris@42 153 Tu = Ts * Tt;
Chris@42 154 }
Chris@42 155 {
Chris@42 156 E Tg, Tj, Tf, Ti, T1q, Th, Tm;
Chris@42 157 Tg = Ip[WS(rs, 3)];
Chris@42 158 Tj = Im[WS(rs, 3)];
Chris@42 159 T1w = FNMS(Tv, Tt, T1v);
Chris@42 160 Tx = FMA(Tv, Tw, Tu);
Chris@42 161 Tf = W[12];
Chris@42 162 Ti = W[13];
Chris@42 163 Tn = Rp[WS(rs, 4)];
Chris@42 164 Tq = Rm[WS(rs, 4)];
Chris@42 165 T1q = Tf * Tj;
Chris@42 166 Th = Tf * Tg;
Chris@42 167 Tm = W[14];
Chris@42 168 Tp = W[15];
Chris@42 169 T1r = FNMS(Ti, Tg, T1q);
Chris@42 170 Tk = FMA(Ti, Tj, Th);
Chris@42 171 T1t = Tm * Tq;
Chris@42 172 To = Tm * Tn;
Chris@42 173 }
Chris@42 174 }
Chris@42 175 {
Chris@42 176 E T1s, T1G, Tl, T13, T1u, Tr;
Chris@42 177 T1s = T1p + T1r;
Chris@42 178 T1G = T1r - T1p;
Chris@42 179 Tl = Te - Tk;
Chris@42 180 T13 = Te + Tk;
Chris@42 181 T1u = FNMS(Tp, Tn, T1t);
Chris@42 182 Tr = FMA(Tp, Tq, To);
Chris@42 183 {
Chris@42 184 E T1x, T1F, T14, Ty;
Chris@42 185 T1x = T1u + T1w;
Chris@42 186 T1F = T1w - T1u;
Chris@42 187 T14 = Tr + Tx;
Chris@42 188 Ty = Tr - Tx;
Chris@42 189 T1H = T1F - T1G;
Chris@42 190 T23 = T1G + T1F;
Chris@42 191 T15 = T13 + T14;
Chris@42 192 T1Z = T13 - T14;
Chris@42 193 T2a = Ty - Tl;
Chris@42 194 Tz = Tl + Ty;
Chris@42 195 T1O = T1s + T1x;
Chris@42 196 T1y = T1s - T1x;
Chris@42 197 }
Chris@42 198 }
Chris@42 199 }
Chris@42 200 }
Chris@42 201 }
Chris@42 202 {
Chris@42 203 E T2c, T2e, T29, T2d;
Chris@42 204 {
Chris@42 205 E T1D, T11, T25, T28, T27;
Chris@42 206 T1D = Tz - T10;
Chris@42 207 T11 = Tz + T10;
Chris@42 208 T25 = T23 + T24;
Chris@42 209 T28 = T24 - T23;
Chris@42 210 {
Chris@42 211 E T1N, T1L, T1C, T1M, T1E;
Chris@42 212 T1N = FNMS(KP618033988, T1H, T1K);
Chris@42 213 T1L = FMA(KP618033988, T1K, T1H);
Chris@42 214 Rm[WS(rs, 4)] = T8 + T11;
Chris@42 215 T1C = FNMS(KP250000000, T11, T8);
Chris@42 216 T1M = FNMS(KP559016994, T1D, T1C);
Chris@42 217 T1E = FMA(KP559016994, T1D, T1C);
Chris@42 218 T27 = FMA(KP250000000, T25, T26);
Chris@42 219 T2c = FMA(KP618033988, T2b, T2a);
Chris@42 220 T2e = FNMS(KP618033988, T2a, T2b);
Chris@42 221 Rp[WS(rs, 1)] = FMA(KP951056516, T1L, T1E);
Chris@42 222 Rm[0] = FNMS(KP951056516, T1L, T1E);
Chris@42 223 Rp[WS(rs, 3)] = FMA(KP951056516, T1N, T1M);
Chris@42 224 Rm[WS(rs, 2)] = FNMS(KP951056516, T1N, T1M);
Chris@42 225 }
Chris@42 226 Im[WS(rs, 4)] = T25 - T26;
Chris@42 227 T29 = FMA(KP559016994, T28, T27);
Chris@42 228 T2d = FNMS(KP559016994, T28, T27);
Chris@42 229 }
Chris@42 230 {
Chris@42 231 E T1c, T1A, T1z, T1B, T19, T1b, T1a, T1Q, T1W, T1V;
Chris@42 232 T19 = T15 + T18;
Chris@42 233 T1b = T15 - T18;
Chris@42 234 Ip[WS(rs, 3)] = FMA(KP951056516, T2e, T2d);
Chris@42 235 Im[WS(rs, 2)] = FMS(KP951056516, T2e, T2d);
Chris@42 236 Ip[WS(rs, 1)] = FMA(KP951056516, T2c, T29);
Chris@42 237 Im[0] = FMS(KP951056516, T2c, T29);
Chris@42 238 T1a = FNMS(KP250000000, T19, T12);
Chris@42 239 Rp[0] = T12 + T19;
Chris@42 240 T1c = FNMS(KP559016994, T1b, T1a);
Chris@42 241 T1A = FMA(KP559016994, T1b, T1a);
Chris@42 242 T1z = FNMS(KP618033988, T1y, T1n);
Chris@42 243 T1B = FMA(KP618033988, T1n, T1y);
Chris@42 244 T1Q = T1O + T1P;
Chris@42 245 T1W = T1O - T1P;
Chris@42 246 Rm[WS(rs, 3)] = FMA(KP951056516, T1B, T1A);
Chris@42 247 Rp[WS(rs, 4)] = FNMS(KP951056516, T1B, T1A);
Chris@42 248 Rm[WS(rs, 1)] = FMA(KP951056516, T1z, T1c);
Chris@42 249 Rp[WS(rs, 2)] = FNMS(KP951056516, T1z, T1c);
Chris@42 250 T1V = FNMS(KP250000000, T1Q, T1U);
Chris@42 251 Ip[0] = T1Q + T1U;
Chris@42 252 T1X = FNMS(KP559016994, T1W, T1V);
Chris@42 253 T21 = FMA(KP559016994, T1W, T1V);
Chris@42 254 T20 = FNMS(KP618033988, T1Z, T1Y);
Chris@42 255 T22 = FMA(KP618033988, T1Y, T1Z);
Chris@42 256 }
Chris@42 257 }
Chris@42 258 }
Chris@42 259 Ip[WS(rs, 4)] = FMA(KP951056516, T22, T21);
Chris@42 260 Im[WS(rs, 3)] = FMS(KP951056516, T22, T21);
Chris@42 261 Ip[WS(rs, 2)] = FMA(KP951056516, T20, T1X);
Chris@42 262 Im[WS(rs, 1)] = FMS(KP951056516, T20, T1X);
Chris@42 263 }
Chris@42 264 }
Chris@42 265 }
Chris@42 266
Chris@42 267 static const tw_instr twinstr[] = {
Chris@42 268 {TW_FULL, 1, 10},
Chris@42 269 {TW_NEXT, 1, 0}
Chris@42 270 };
Chris@42 271
Chris@42 272 static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@42 273
Chris@42 274 void X(codelet_hc2cf_10) (planner *p) {
Chris@42 275 X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT);
Chris@42 276 }
Chris@42 277 #else /* HAVE_FMA */
Chris@42 278
Chris@42 279 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include hc2cf.h */
Chris@42 280
Chris@42 281 /*
Chris@42 282 * This function contains 102 FP additions, 60 FP multiplications,
Chris@42 283 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@42 284 * 45 stack variables, 4 constants, and 40 memory accesses
Chris@42 285 */
Chris@42 286 #include "hc2cf.h"
Chris@42 287
Chris@42 288 static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 289 {
Chris@42 290 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 291 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 292 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 293 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 294 {
Chris@42 295 INT m;
Chris@42 296 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@42 297 E T7, T1O, TT, T1C, TF, TQ, TR, T1r, T1s, T1L, TX, TY, TZ, T16, T19;
Chris@42 298 E T1y, Ti, Tt, Tu, T1o, T1p, T1M, TU, TV, TW, T1d, T1g, T1x;
Chris@42 299 {
Chris@42 300 E T1, T1B, T6, T1A;
Chris@42 301 T1 = Rp[0];
Chris@42 302 T1B = Rm[0];
Chris@42 303 {
Chris@42 304 E T3, T5, T2, T4;
Chris@42 305 T3 = Ip[WS(rs, 2)];
Chris@42 306 T5 = Im[WS(rs, 2)];
Chris@42 307 T2 = W[8];
Chris@42 308 T4 = W[9];
Chris@42 309 T6 = FMA(T2, T3, T4 * T5);
Chris@42 310 T1A = FNMS(T4, T3, T2 * T5);
Chris@42 311 }
Chris@42 312 T7 = T1 - T6;
Chris@42 313 T1O = T1B - T1A;
Chris@42 314 TT = T1 + T6;
Chris@42 315 T1C = T1A + T1B;
Chris@42 316 }
Chris@42 317 {
Chris@42 318 E Tz, T14, TP, T18, TE, T15, TK, T17;
Chris@42 319 {
Chris@42 320 E Tw, Ty, Tv, Tx;
Chris@42 321 Tw = Rp[WS(rs, 2)];
Chris@42 322 Ty = Rm[WS(rs, 2)];
Chris@42 323 Tv = W[6];
Chris@42 324 Tx = W[7];
Chris@42 325 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 326 T14 = FNMS(Tx, Tw, Tv * Ty);
Chris@42 327 }
Chris@42 328 {
Chris@42 329 E TM, TO, TL, TN;
Chris@42 330 TM = Ip[0];
Chris@42 331 TO = Im[0];
Chris@42 332 TL = W[0];
Chris@42 333 TN = W[1];
Chris@42 334 TP = FMA(TL, TM, TN * TO);
Chris@42 335 T18 = FNMS(TN, TM, TL * TO);
Chris@42 336 }
Chris@42 337 {
Chris@42 338 E TB, TD, TA, TC;
Chris@42 339 TB = Ip[WS(rs, 4)];
Chris@42 340 TD = Im[WS(rs, 4)];
Chris@42 341 TA = W[16];
Chris@42 342 TC = W[17];
Chris@42 343 TE = FMA(TA, TB, TC * TD);
Chris@42 344 T15 = FNMS(TC, TB, TA * TD);
Chris@42 345 }
Chris@42 346 {
Chris@42 347 E TH, TJ, TG, TI;
Chris@42 348 TH = Rp[WS(rs, 3)];
Chris@42 349 TJ = Rm[WS(rs, 3)];
Chris@42 350 TG = W[10];
Chris@42 351 TI = W[11];
Chris@42 352 TK = FMA(TG, TH, TI * TJ);
Chris@42 353 T17 = FNMS(TI, TH, TG * TJ);
Chris@42 354 }
Chris@42 355 TF = Tz - TE;
Chris@42 356 TQ = TK - TP;
Chris@42 357 TR = TF + TQ;
Chris@42 358 T1r = T14 - T15;
Chris@42 359 T1s = T18 - T17;
Chris@42 360 T1L = T1s - T1r;
Chris@42 361 TX = Tz + TE;
Chris@42 362 TY = TK + TP;
Chris@42 363 TZ = TX + TY;
Chris@42 364 T16 = T14 + T15;
Chris@42 365 T19 = T17 + T18;
Chris@42 366 T1y = T16 + T19;
Chris@42 367 }
Chris@42 368 {
Chris@42 369 E Tc, T1b, Ts, T1f, Th, T1c, Tn, T1e;
Chris@42 370 {
Chris@42 371 E T9, Tb, T8, Ta;
Chris@42 372 T9 = Rp[WS(rs, 1)];
Chris@42 373 Tb = Rm[WS(rs, 1)];
Chris@42 374 T8 = W[2];
Chris@42 375 Ta = W[3];
Chris@42 376 Tc = FMA(T8, T9, Ta * Tb);
Chris@42 377 T1b = FNMS(Ta, T9, T8 * Tb);
Chris@42 378 }
Chris@42 379 {
Chris@42 380 E Tp, Tr, To, Tq;
Chris@42 381 Tp = Ip[WS(rs, 1)];
Chris@42 382 Tr = Im[WS(rs, 1)];
Chris@42 383 To = W[4];
Chris@42 384 Tq = W[5];
Chris@42 385 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 386 T1f = FNMS(Tq, Tp, To * Tr);
Chris@42 387 }
Chris@42 388 {
Chris@42 389 E Te, Tg, Td, Tf;
Chris@42 390 Te = Ip[WS(rs, 3)];
Chris@42 391 Tg = Im[WS(rs, 3)];
Chris@42 392 Td = W[12];
Chris@42 393 Tf = W[13];
Chris@42 394 Th = FMA(Td, Te, Tf * Tg);
Chris@42 395 T1c = FNMS(Tf, Te, Td * Tg);
Chris@42 396 }
Chris@42 397 {
Chris@42 398 E Tk, Tm, Tj, Tl;
Chris@42 399 Tk = Rp[WS(rs, 4)];
Chris@42 400 Tm = Rm[WS(rs, 4)];
Chris@42 401 Tj = W[14];
Chris@42 402 Tl = W[15];
Chris@42 403 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 404 T1e = FNMS(Tl, Tk, Tj * Tm);
Chris@42 405 }
Chris@42 406 Ti = Tc - Th;
Chris@42 407 Tt = Tn - Ts;
Chris@42 408 Tu = Ti + Tt;
Chris@42 409 T1o = T1b - T1c;
Chris@42 410 T1p = T1e - T1f;
Chris@42 411 T1M = T1o + T1p;
Chris@42 412 TU = Tc + Th;
Chris@42 413 TV = Tn + Ts;
Chris@42 414 TW = TU + TV;
Chris@42 415 T1d = T1b + T1c;
Chris@42 416 T1g = T1e + T1f;
Chris@42 417 T1x = T1d + T1g;
Chris@42 418 }
Chris@42 419 {
Chris@42 420 E T1l, TS, T1m, T1u, T1w, T1q, T1t, T1v, T1n;
Chris@42 421 T1l = KP559016994 * (Tu - TR);
Chris@42 422 TS = Tu + TR;
Chris@42 423 T1m = FNMS(KP250000000, TS, T7);
Chris@42 424 T1q = T1o - T1p;
Chris@42 425 T1t = T1r + T1s;
Chris@42 426 T1u = FMA(KP951056516, T1q, KP587785252 * T1t);
Chris@42 427 T1w = FNMS(KP587785252, T1q, KP951056516 * T1t);
Chris@42 428 Rm[WS(rs, 4)] = T7 + TS;
Chris@42 429 T1v = T1m - T1l;
Chris@42 430 Rm[WS(rs, 2)] = T1v - T1w;
Chris@42 431 Rp[WS(rs, 3)] = T1v + T1w;
Chris@42 432 T1n = T1l + T1m;
Chris@42 433 Rm[0] = T1n - T1u;
Chris@42 434 Rp[WS(rs, 1)] = T1n + T1u;
Chris@42 435 }
Chris@42 436 {
Chris@42 437 E T1S, T1N, T1T, T1R, T1V, T1P, T1Q, T1W, T1U;
Chris@42 438 T1S = KP559016994 * (T1M + T1L);
Chris@42 439 T1N = T1L - T1M;
Chris@42 440 T1T = FMA(KP250000000, T1N, T1O);
Chris@42 441 T1P = TQ - TF;
Chris@42 442 T1Q = Ti - Tt;
Chris@42 443 T1R = FNMS(KP951056516, T1Q, KP587785252 * T1P);
Chris@42 444 T1V = FMA(KP587785252, T1Q, KP951056516 * T1P);
Chris@42 445 Im[WS(rs, 4)] = T1N - T1O;
Chris@42 446 T1W = T1T - T1S;
Chris@42 447 Im[WS(rs, 2)] = T1V - T1W;
Chris@42 448 Ip[WS(rs, 3)] = T1V + T1W;
Chris@42 449 T1U = T1S + T1T;
Chris@42 450 Im[0] = T1R - T1U;
Chris@42 451 Ip[WS(rs, 1)] = T1R + T1U;
Chris@42 452 }
Chris@42 453 {
Chris@42 454 E T12, T10, T11, T1i, T1k, T1a, T1h, T1j, T13;
Chris@42 455 T12 = KP559016994 * (TW - TZ);
Chris@42 456 T10 = TW + TZ;
Chris@42 457 T11 = FNMS(KP250000000, T10, TT);
Chris@42 458 T1a = T16 - T19;
Chris@42 459 T1h = T1d - T1g;
Chris@42 460 T1i = FNMS(KP587785252, T1h, KP951056516 * T1a);
Chris@42 461 T1k = FMA(KP951056516, T1h, KP587785252 * T1a);
Chris@42 462 Rp[0] = TT + T10;
Chris@42 463 T1j = T12 + T11;
Chris@42 464 Rp[WS(rs, 4)] = T1j - T1k;
Chris@42 465 Rm[WS(rs, 3)] = T1j + T1k;
Chris@42 466 T13 = T11 - T12;
Chris@42 467 Rp[WS(rs, 2)] = T13 - T1i;
Chris@42 468 Rm[WS(rs, 1)] = T13 + T1i;
Chris@42 469 }
Chris@42 470 {
Chris@42 471 E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
Chris@42 472 T1H = KP559016994 * (T1x - T1y);
Chris@42 473 T1z = T1x + T1y;
Chris@42 474 T1G = FNMS(KP250000000, T1z, T1C);
Chris@42 475 T1D = TX - TY;
Chris@42 476 T1E = TU - TV;
Chris@42 477 T1F = FNMS(KP587785252, T1E, KP951056516 * T1D);
Chris@42 478 T1J = FMA(KP951056516, T1E, KP587785252 * T1D);
Chris@42 479 Ip[0] = T1z + T1C;
Chris@42 480 T1K = T1H + T1G;
Chris@42 481 Im[WS(rs, 3)] = T1J - T1K;
Chris@42 482 Ip[WS(rs, 4)] = T1J + T1K;
Chris@42 483 T1I = T1G - T1H;
Chris@42 484 Im[WS(rs, 1)] = T1F - T1I;
Chris@42 485 Ip[WS(rs, 2)] = T1F + T1I;
Chris@42 486 }
Chris@42 487 }
Chris@42 488 }
Chris@42 489 }
Chris@42 490
Chris@42 491 static const tw_instr twinstr[] = {
Chris@42 492 {TW_FULL, 1, 10},
Chris@42 493 {TW_NEXT, 1, 0}
Chris@42 494 };
Chris@42 495
Chris@42 496 static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@42 497
Chris@42 498 void X(codelet_hc2cf_10) (planner *p) {
Chris@42 499 X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT);
Chris@42 500 }
Chris@42 501 #endif /* HAVE_FMA */