annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:20 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@42 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@42 33 * 72 stack variables, 4 constants, and 40 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 46 E T29, T2d, T2c, T2e;
Chris@42 47 {
Chris@42 48 E T23, T1U, T8, T12, T1y, T1P, T25, T1H, T2b, T18, T10, T1Y, T1I, Tl, T13;
Chris@42 49 E T1J, Ty, T14, T1n, T1O, T24, T1K;
Chris@42 50 {
Chris@42 51 E T1, T1R, T3, T6, T2, T5;
Chris@42 52 T1 = cr[0];
Chris@42 53 T1R = ci[0];
Chris@42 54 T3 = cr[WS(rs, 5)];
Chris@42 55 T6 = ci[WS(rs, 5)];
Chris@42 56 T2 = W[8];
Chris@42 57 T5 = W[9];
Chris@42 58 {
Chris@42 59 E T1p, TY, T1x, T1F, TM, T16, T1r, TS;
Chris@42 60 {
Chris@42 61 E TF, T1w, TO, TR, T1u, TL, TN, TQ, T1q, TP;
Chris@42 62 {
Chris@42 63 E TU, TX, TT, TW;
Chris@42 64 {
Chris@42 65 E TB, TE, T1S, T4, TA, TD;
Chris@42 66 TB = cr[WS(rs, 4)];
Chris@42 67 TE = ci[WS(rs, 4)];
Chris@42 68 T1S = T2 * T6;
Chris@42 69 T4 = T2 * T3;
Chris@42 70 TA = W[6];
Chris@42 71 TD = W[7];
Chris@42 72 {
Chris@42 73 E T1T, T7, T1v, TC;
Chris@42 74 T1T = FNMS(T5, T3, T1S);
Chris@42 75 T7 = FMA(T5, T6, T4);
Chris@42 76 T1v = TA * TE;
Chris@42 77 TC = TA * TB;
Chris@42 78 T23 = T1T + T1R;
Chris@42 79 T1U = T1R - T1T;
Chris@42 80 T8 = T1 - T7;
Chris@42 81 T12 = T1 + T7;
Chris@42 82 TF = FMA(TD, TE, TC);
Chris@42 83 T1w = FNMS(TD, TB, T1v);
Chris@42 84 }
Chris@42 85 }
Chris@42 86 TU = cr[WS(rs, 1)];
Chris@42 87 TX = ci[WS(rs, 1)];
Chris@42 88 TT = W[0];
Chris@42 89 TW = W[1];
Chris@42 90 {
Chris@42 91 E TH, TK, TJ, T1t, TI, T1o, TV, TG;
Chris@42 92 TH = cr[WS(rs, 9)];
Chris@42 93 TK = ci[WS(rs, 9)];
Chris@42 94 T1o = TT * TX;
Chris@42 95 TV = TT * TU;
Chris@42 96 TG = W[16];
Chris@42 97 TJ = W[17];
Chris@42 98 T1p = FNMS(TW, TU, T1o);
Chris@42 99 TY = FMA(TW, TX, TV);
Chris@42 100 T1t = TG * TK;
Chris@42 101 TI = TG * TH;
Chris@42 102 TO = cr[WS(rs, 6)];
Chris@42 103 TR = ci[WS(rs, 6)];
Chris@42 104 T1u = FNMS(TJ, TH, T1t);
Chris@42 105 TL = FMA(TJ, TK, TI);
Chris@42 106 TN = W[10];
Chris@42 107 TQ = W[11];
Chris@42 108 }
Chris@42 109 }
Chris@42 110 T1x = T1u - T1w;
Chris@42 111 T1F = T1w + T1u;
Chris@42 112 TM = TF - TL;
Chris@42 113 T16 = TF + TL;
Chris@42 114 T1q = TN * TR;
Chris@42 115 TP = TN * TO;
Chris@42 116 T1r = FNMS(TQ, TO, T1q);
Chris@42 117 TS = FMA(TQ, TR, TP);
Chris@42 118 }
Chris@42 119 {
Chris@42 120 E T1l, Te, T1e, Tx, Tn, Tq, Tp, T1j, Tk, T1f, To;
Chris@42 121 {
Chris@42 122 E Tt, Tw, Tv, T1d, Tu;
Chris@42 123 {
Chris@42 124 E Ta, Td, T9, Tc, T1k, Tb, Ts;
Chris@42 125 Ta = cr[WS(rs, 2)];
Chris@42 126 Td = ci[WS(rs, 2)];
Chris@42 127 {
Chris@42 128 E T1G, T1s, TZ, T17;
Chris@42 129 T1G = T1r + T1p;
Chris@42 130 T1s = T1p - T1r;
Chris@42 131 TZ = TS - TY;
Chris@42 132 T17 = TS + TY;
Chris@42 133 T1y = T1s - T1x;
Chris@42 134 T1P = T1x + T1s;
Chris@42 135 T25 = T1F + T1G;
Chris@42 136 T1H = T1F - T1G;
Chris@42 137 T2b = T16 - T17;
Chris@42 138 T18 = T16 + T17;
Chris@42 139 T10 = TM + TZ;
Chris@42 140 T1Y = TZ - TM;
Chris@42 141 T9 = W[2];
Chris@42 142 }
Chris@42 143 Tc = W[3];
Chris@42 144 Tt = cr[WS(rs, 3)];
Chris@42 145 Tw = ci[WS(rs, 3)];
Chris@42 146 T1k = T9 * Td;
Chris@42 147 Tb = T9 * Ta;
Chris@42 148 Ts = W[4];
Chris@42 149 Tv = W[5];
Chris@42 150 T1l = FNMS(Tc, Ta, T1k);
Chris@42 151 Te = FMA(Tc, Td, Tb);
Chris@42 152 T1d = Ts * Tw;
Chris@42 153 Tu = Ts * Tt;
Chris@42 154 }
Chris@42 155 {
Chris@42 156 E Tg, Tj, Tf, Ti, T1i, Th, Tm;
Chris@42 157 Tg = cr[WS(rs, 7)];
Chris@42 158 Tj = ci[WS(rs, 7)];
Chris@42 159 T1e = FNMS(Tv, Tt, T1d);
Chris@42 160 Tx = FMA(Tv, Tw, Tu);
Chris@42 161 Tf = W[12];
Chris@42 162 Ti = W[13];
Chris@42 163 Tn = cr[WS(rs, 8)];
Chris@42 164 Tq = ci[WS(rs, 8)];
Chris@42 165 T1i = Tf * Tj;
Chris@42 166 Th = Tf * Tg;
Chris@42 167 Tm = W[14];
Chris@42 168 Tp = W[15];
Chris@42 169 T1j = FNMS(Ti, Tg, T1i);
Chris@42 170 Tk = FMA(Ti, Tj, Th);
Chris@42 171 T1f = Tm * Tq;
Chris@42 172 To = Tm * Tn;
Chris@42 173 }
Chris@42 174 }
Chris@42 175 {
Chris@42 176 E T1m, T1g, Tr, T1h;
Chris@42 177 T1m = T1j - T1l;
Chris@42 178 T1I = T1l + T1j;
Chris@42 179 Tl = Te - Tk;
Chris@42 180 T13 = Te + Tk;
Chris@42 181 T1g = FNMS(Tp, Tn, T1f);
Chris@42 182 Tr = FMA(Tp, Tq, To);
Chris@42 183 T1J = T1g + T1e;
Chris@42 184 T1h = T1e - T1g;
Chris@42 185 Ty = Tr - Tx;
Chris@42 186 T14 = Tr + Tx;
Chris@42 187 T1n = T1h - T1m;
Chris@42 188 T1O = T1m + T1h;
Chris@42 189 }
Chris@42 190 }
Chris@42 191 }
Chris@42 192 }
Chris@42 193 T24 = T1I + T1J;
Chris@42 194 T1K = T1I - T1J;
Chris@42 195 {
Chris@42 196 E T2a, T15, Tz, T1Z;
Chris@42 197 T2a = T13 - T14;
Chris@42 198 T15 = T13 + T14;
Chris@42 199 Tz = Tl + Ty;
Chris@42 200 T1Z = Ty - Tl;
Chris@42 201 {
Chris@42 202 E T1L, T1N, T1E, T1M;
Chris@42 203 {
Chris@42 204 E T19, T1D, T1C, T11, T1b;
Chris@42 205 T19 = T15 + T18;
Chris@42 206 T1D = T15 - T18;
Chris@42 207 T11 = Tz + T10;
Chris@42 208 T1b = Tz - T10;
Chris@42 209 {
Chris@42 210 E T1B, T1z, T1a, T1A, T1c;
Chris@42 211 T1B = FNMS(KP618033988, T1n, T1y);
Chris@42 212 T1z = FMA(KP618033988, T1y, T1n);
Chris@42 213 ci[WS(rs, 4)] = T8 + T11;
Chris@42 214 T1a = FNMS(KP250000000, T11, T8);
Chris@42 215 T1A = FNMS(KP559016994, T1b, T1a);
Chris@42 216 T1c = FMA(KP559016994, T1b, T1a);
Chris@42 217 T1C = FNMS(KP250000000, T19, T12);
Chris@42 218 T1L = FNMS(KP618033988, T1K, T1H);
Chris@42 219 T1N = FMA(KP618033988, T1H, T1K);
Chris@42 220 cr[WS(rs, 1)] = FMA(KP951056516, T1z, T1c);
Chris@42 221 ci[0] = FNMS(KP951056516, T1z, T1c);
Chris@42 222 cr[WS(rs, 3)] = FMA(KP951056516, T1B, T1A);
Chris@42 223 ci[WS(rs, 2)] = FNMS(KP951056516, T1B, T1A);
Chris@42 224 }
Chris@42 225 cr[0] = T12 + T19;
Chris@42 226 T1E = FNMS(KP559016994, T1D, T1C);
Chris@42 227 T1M = FMA(KP559016994, T1D, T1C);
Chris@42 228 }
Chris@42 229 {
Chris@42 230 E T1X, T21, T20, T22, T1Q, T1W, T1V, T26, T28, T27;
Chris@42 231 T1Q = T1O + T1P;
Chris@42 232 T1W = T1P - T1O;
Chris@42 233 ci[WS(rs, 3)] = FMA(KP951056516, T1N, T1M);
Chris@42 234 cr[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M);
Chris@42 235 ci[WS(rs, 1)] = FMA(KP951056516, T1L, T1E);
Chris@42 236 cr[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E);
Chris@42 237 T1V = FMA(KP250000000, T1Q, T1U);
Chris@42 238 cr[WS(rs, 5)] = T1Q - T1U;
Chris@42 239 T1X = FNMS(KP559016994, T1W, T1V);
Chris@42 240 T21 = FMA(KP559016994, T1W, T1V);
Chris@42 241 T20 = FNMS(KP618033988, T1Z, T1Y);
Chris@42 242 T22 = FMA(KP618033988, T1Y, T1Z);
Chris@42 243 T26 = T24 + T25;
Chris@42 244 T28 = T24 - T25;
Chris@42 245 ci[WS(rs, 8)] = FMA(KP951056516, T22, T21);
Chris@42 246 cr[WS(rs, 9)] = FMS(KP951056516, T22, T21);
Chris@42 247 ci[WS(rs, 6)] = FMA(KP951056516, T20, T1X);
Chris@42 248 cr[WS(rs, 7)] = FMS(KP951056516, T20, T1X);
Chris@42 249 T27 = FNMS(KP250000000, T26, T23);
Chris@42 250 ci[WS(rs, 9)] = T26 + T23;
Chris@42 251 T29 = FMA(KP559016994, T28, T27);
Chris@42 252 T2d = FNMS(KP559016994, T28, T27);
Chris@42 253 T2c = FMA(KP618033988, T2b, T2a);
Chris@42 254 T2e = FNMS(KP618033988, T2a, T2b);
Chris@42 255 }
Chris@42 256 }
Chris@42 257 }
Chris@42 258 }
Chris@42 259 ci[WS(rs, 7)] = FMA(KP951056516, T2e, T2d);
Chris@42 260 cr[WS(rs, 8)] = FMS(KP951056516, T2e, T2d);
Chris@42 261 ci[WS(rs, 5)] = FMA(KP951056516, T2c, T29);
Chris@42 262 cr[WS(rs, 6)] = FMS(KP951056516, T2c, T29);
Chris@42 263 }
Chris@42 264 }
Chris@42 265 }
Chris@42 266
Chris@42 267 static const tw_instr twinstr[] = {
Chris@42 268 {TW_FULL, 1, 10},
Chris@42 269 {TW_NEXT, 1, 0}
Chris@42 270 };
Chris@42 271
Chris@42 272 static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@42 273
Chris@42 274 void X(codelet_hf_10) (planner *p) {
Chris@42 275 X(khc2hc_register) (p, hf_10, &desc);
Chris@42 276 }
Chris@42 277 #else /* HAVE_FMA */
Chris@42 278
Chris@42 279 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hf_10 -include hf.h */
Chris@42 280
Chris@42 281 /*
Chris@42 282 * This function contains 102 FP additions, 60 FP multiplications,
Chris@42 283 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@42 284 * 45 stack variables, 4 constants, and 40 memory accesses
Chris@42 285 */
Chris@42 286 #include "hf.h"
Chris@42 287
Chris@42 288 static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 289 {
Chris@42 290 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 291 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 292 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 293 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 294 {
Chris@42 295 INT m;
Chris@42 296 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 297 E T7, T1R, TT, T1C, TF, TQ, TR, T1o, T1p, T1P, TX, TY, TZ, T1d, T1g;
Chris@42 298 E T1x, Ti, Tt, Tu, T1r, T1s, T1O, TU, TV, TW, T16, T19, T1y;
Chris@42 299 {
Chris@42 300 E T1, T1A, T6, T1B;
Chris@42 301 T1 = cr[0];
Chris@42 302 T1A = ci[0];
Chris@42 303 {
Chris@42 304 E T3, T5, T2, T4;
Chris@42 305 T3 = cr[WS(rs, 5)];
Chris@42 306 T5 = ci[WS(rs, 5)];
Chris@42 307 T2 = W[8];
Chris@42 308 T4 = W[9];
Chris@42 309 T6 = FMA(T2, T3, T4 * T5);
Chris@42 310 T1B = FNMS(T4, T3, T2 * T5);
Chris@42 311 }
Chris@42 312 T7 = T1 - T6;
Chris@42 313 T1R = T1B + T1A;
Chris@42 314 TT = T1 + T6;
Chris@42 315 T1C = T1A - T1B;
Chris@42 316 }
Chris@42 317 {
Chris@42 318 E Tz, T1b, TP, T1e, TE, T1c, TK, T1f;
Chris@42 319 {
Chris@42 320 E Tw, Ty, Tv, Tx;
Chris@42 321 Tw = cr[WS(rs, 4)];
Chris@42 322 Ty = ci[WS(rs, 4)];
Chris@42 323 Tv = W[6];
Chris@42 324 Tx = W[7];
Chris@42 325 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 326 T1b = FNMS(Tx, Tw, Tv * Ty);
Chris@42 327 }
Chris@42 328 {
Chris@42 329 E TM, TO, TL, TN;
Chris@42 330 TM = cr[WS(rs, 1)];
Chris@42 331 TO = ci[WS(rs, 1)];
Chris@42 332 TL = W[0];
Chris@42 333 TN = W[1];
Chris@42 334 TP = FMA(TL, TM, TN * TO);
Chris@42 335 T1e = FNMS(TN, TM, TL * TO);
Chris@42 336 }
Chris@42 337 {
Chris@42 338 E TB, TD, TA, TC;
Chris@42 339 TB = cr[WS(rs, 9)];
Chris@42 340 TD = ci[WS(rs, 9)];
Chris@42 341 TA = W[16];
Chris@42 342 TC = W[17];
Chris@42 343 TE = FMA(TA, TB, TC * TD);
Chris@42 344 T1c = FNMS(TC, TB, TA * TD);
Chris@42 345 }
Chris@42 346 {
Chris@42 347 E TH, TJ, TG, TI;
Chris@42 348 TH = cr[WS(rs, 6)];
Chris@42 349 TJ = ci[WS(rs, 6)];
Chris@42 350 TG = W[10];
Chris@42 351 TI = W[11];
Chris@42 352 TK = FMA(TG, TH, TI * TJ);
Chris@42 353 T1f = FNMS(TI, TH, TG * TJ);
Chris@42 354 }
Chris@42 355 TF = Tz - TE;
Chris@42 356 TQ = TK - TP;
Chris@42 357 TR = TF + TQ;
Chris@42 358 T1o = T1b + T1c;
Chris@42 359 T1p = T1f + T1e;
Chris@42 360 T1P = T1o + T1p;
Chris@42 361 TX = Tz + TE;
Chris@42 362 TY = TK + TP;
Chris@42 363 TZ = TX + TY;
Chris@42 364 T1d = T1b - T1c;
Chris@42 365 T1g = T1e - T1f;
Chris@42 366 T1x = T1g - T1d;
Chris@42 367 }
Chris@42 368 {
Chris@42 369 E Tc, T14, Ts, T18, Th, T15, Tn, T17;
Chris@42 370 {
Chris@42 371 E T9, Tb, T8, Ta;
Chris@42 372 T9 = cr[WS(rs, 2)];
Chris@42 373 Tb = ci[WS(rs, 2)];
Chris@42 374 T8 = W[2];
Chris@42 375 Ta = W[3];
Chris@42 376 Tc = FMA(T8, T9, Ta * Tb);
Chris@42 377 T14 = FNMS(Ta, T9, T8 * Tb);
Chris@42 378 }
Chris@42 379 {
Chris@42 380 E Tp, Tr, To, Tq;
Chris@42 381 Tp = cr[WS(rs, 3)];
Chris@42 382 Tr = ci[WS(rs, 3)];
Chris@42 383 To = W[4];
Chris@42 384 Tq = W[5];
Chris@42 385 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 386 T18 = FNMS(Tq, Tp, To * Tr);
Chris@42 387 }
Chris@42 388 {
Chris@42 389 E Te, Tg, Td, Tf;
Chris@42 390 Te = cr[WS(rs, 7)];
Chris@42 391 Tg = ci[WS(rs, 7)];
Chris@42 392 Td = W[12];
Chris@42 393 Tf = W[13];
Chris@42 394 Th = FMA(Td, Te, Tf * Tg);
Chris@42 395 T15 = FNMS(Tf, Te, Td * Tg);
Chris@42 396 }
Chris@42 397 {
Chris@42 398 E Tk, Tm, Tj, Tl;
Chris@42 399 Tk = cr[WS(rs, 8)];
Chris@42 400 Tm = ci[WS(rs, 8)];
Chris@42 401 Tj = W[14];
Chris@42 402 Tl = W[15];
Chris@42 403 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 404 T17 = FNMS(Tl, Tk, Tj * Tm);
Chris@42 405 }
Chris@42 406 Ti = Tc - Th;
Chris@42 407 Tt = Tn - Ts;
Chris@42 408 Tu = Ti + Tt;
Chris@42 409 T1r = T14 + T15;
Chris@42 410 T1s = T17 + T18;
Chris@42 411 T1O = T1r + T1s;
Chris@42 412 TU = Tc + Th;
Chris@42 413 TV = Tn + Ts;
Chris@42 414 TW = TU + TV;
Chris@42 415 T16 = T14 - T15;
Chris@42 416 T19 = T17 - T18;
Chris@42 417 T1y = T16 + T19;
Chris@42 418 }
Chris@42 419 {
Chris@42 420 E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13;
Chris@42 421 T11 = KP559016994 * (Tu - TR);
Chris@42 422 TS = Tu + TR;
Chris@42 423 T12 = FNMS(KP250000000, TS, T7);
Chris@42 424 T1a = T16 - T19;
Chris@42 425 T1h = T1d + T1g;
Chris@42 426 T1i = FMA(KP951056516, T1a, KP587785252 * T1h);
Chris@42 427 T1k = FNMS(KP587785252, T1a, KP951056516 * T1h);
Chris@42 428 ci[WS(rs, 4)] = T7 + TS;
Chris@42 429 T1j = T12 - T11;
Chris@42 430 ci[WS(rs, 2)] = T1j - T1k;
Chris@42 431 cr[WS(rs, 3)] = T1j + T1k;
Chris@42 432 T13 = T11 + T12;
Chris@42 433 ci[0] = T13 - T1i;
Chris@42 434 cr[WS(rs, 1)] = T13 + T1i;
Chris@42 435 }
Chris@42 436 {
Chris@42 437 E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n;
Chris@42 438 T1m = KP559016994 * (TW - TZ);
Chris@42 439 T10 = TW + TZ;
Chris@42 440 T1l = FNMS(KP250000000, T10, TT);
Chris@42 441 T1q = T1o - T1p;
Chris@42 442 T1t = T1r - T1s;
Chris@42 443 T1u = FNMS(KP587785252, T1t, KP951056516 * T1q);
Chris@42 444 T1w = FMA(KP951056516, T1t, KP587785252 * T1q);
Chris@42 445 cr[0] = TT + T10;
Chris@42 446 T1v = T1m + T1l;
Chris@42 447 cr[WS(rs, 4)] = T1v - T1w;
Chris@42 448 ci[WS(rs, 3)] = T1v + T1w;
Chris@42 449 T1n = T1l - T1m;
Chris@42 450 cr[WS(rs, 2)] = T1n - T1u;
Chris@42 451 ci[WS(rs, 1)] = T1n + T1u;
Chris@42 452 }
Chris@42 453 {
Chris@42 454 E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
Chris@42 455 T1H = KP559016994 * (T1y + T1x);
Chris@42 456 T1z = T1x - T1y;
Chris@42 457 T1G = FMA(KP250000000, T1z, T1C);
Chris@42 458 T1D = Ti - Tt;
Chris@42 459 T1E = TQ - TF;
Chris@42 460 T1F = FMA(KP587785252, T1D, KP951056516 * T1E);
Chris@42 461 T1J = FNMS(KP951056516, T1D, KP587785252 * T1E);
Chris@42 462 cr[WS(rs, 5)] = T1z - T1C;
Chris@42 463 T1K = T1H + T1G;
Chris@42 464 cr[WS(rs, 9)] = T1J - T1K;
Chris@42 465 ci[WS(rs, 8)] = T1J + T1K;
Chris@42 466 T1I = T1G - T1H;
Chris@42 467 cr[WS(rs, 7)] = T1F - T1I;
Chris@42 468 ci[WS(rs, 6)] = T1F + T1I;
Chris@42 469 }
Chris@42 470 {
Chris@42 471 E T1Q, T1S, T1T, T1N, T1V, T1L, T1M, T1W, T1U;
Chris@42 472 T1Q = KP559016994 * (T1O - T1P);
Chris@42 473 T1S = T1O + T1P;
Chris@42 474 T1T = FNMS(KP250000000, T1S, T1R);
Chris@42 475 T1L = TU - TV;
Chris@42 476 T1M = TX - TY;
Chris@42 477 T1N = FMA(KP951056516, T1L, KP587785252 * T1M);
Chris@42 478 T1V = FNMS(KP587785252, T1L, KP951056516 * T1M);
Chris@42 479 ci[WS(rs, 9)] = T1S + T1R;
Chris@42 480 T1W = T1T - T1Q;
Chris@42 481 cr[WS(rs, 8)] = T1V - T1W;
Chris@42 482 ci[WS(rs, 7)] = T1V + T1W;
Chris@42 483 T1U = T1Q + T1T;
Chris@42 484 cr[WS(rs, 6)] = T1N - T1U;
Chris@42 485 ci[WS(rs, 5)] = T1N + T1U;
Chris@42 486 }
Chris@42 487 }
Chris@42 488 }
Chris@42 489 }
Chris@42 490
Chris@42 491 static const tw_instr twinstr[] = {
Chris@42 492 {TW_FULL, 1, 10},
Chris@42 493 {TW_NEXT, 1, 0}
Chris@42 494 };
Chris@42 495
Chris@42 496 static const hc2hc_desc desc = { 10, "hf_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@42 497
Chris@42 498 void X(codelet_hf_10) (planner *p) {
Chris@42 499 X(khc2hc_register) (p, hf_10, &desc);
Chris@42 500 }
Chris@42 501 #endif /* HAVE_FMA */