annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:44 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@42 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@42 33 * 71 stack variables, 4 constants, and 40 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 46 E T21, T1Y, T1X;
Chris@42 47 {
Chris@42 48 E T1B, TH, T1g, T3, T1V, T1x, T1G, T1E, TM, TK, T11, TB, T7, T1m, T1J;
Chris@42 49 E TO, Th, T1h, T6, T8, TF, TG, T1i, T9;
Chris@42 50 TF = ci[WS(rs, 9)];
Chris@42 51 TG = cr[WS(rs, 5)];
Chris@42 52 {
Chris@42 53 E T1u, Tp, Tu, T1s, Tz, T1v, Ts, Tv;
Chris@42 54 {
Chris@42 55 E Tx, Ty, Tn, To, Tq, Tr;
Chris@42 56 Tn = ci[WS(rs, 5)];
Chris@42 57 To = cr[WS(rs, 9)];
Chris@42 58 Tx = ci[WS(rs, 6)];
Chris@42 59 T1B = TF + TG;
Chris@42 60 TH = TF - TG;
Chris@42 61 T1u = Tn + To;
Chris@42 62 Tp = Tn - To;
Chris@42 63 Ty = cr[WS(rs, 8)];
Chris@42 64 Tq = ci[WS(rs, 8)];
Chris@42 65 Tr = cr[WS(rs, 6)];
Chris@42 66 Tu = ci[WS(rs, 7)];
Chris@42 67 T1s = Tx + Ty;
Chris@42 68 Tz = Tx - Ty;
Chris@42 69 T1v = Tq + Tr;
Chris@42 70 Ts = Tq - Tr;
Chris@42 71 Tv = cr[WS(rs, 7)];
Chris@42 72 }
Chris@42 73 {
Chris@42 74 E T1, T1w, T1D, TJ, Tt, T1r, Tw, T2;
Chris@42 75 T1 = cr[0];
Chris@42 76 T1w = T1u + T1v;
Chris@42 77 T1D = T1u - T1v;
Chris@42 78 TJ = Tp + Ts;
Chris@42 79 Tt = Tp - Ts;
Chris@42 80 T1r = Tu + Tv;
Chris@42 81 Tw = Tu - Tv;
Chris@42 82 T2 = ci[WS(rs, 4)];
Chris@42 83 {
Chris@42 84 E Tb, Tc, Te, Tf;
Chris@42 85 Tb = cr[WS(rs, 4)];
Chris@42 86 {
Chris@42 87 E T1t, T1C, TI, TA;
Chris@42 88 T1t = T1r + T1s;
Chris@42 89 T1C = T1r - T1s;
Chris@42 90 TI = Tw + Tz;
Chris@42 91 TA = Tw - Tz;
Chris@42 92 T1g = T1 - T2;
Chris@42 93 T3 = T1 + T2;
Chris@42 94 T1V = FNMS(KP618033988, T1t, T1w);
Chris@42 95 T1x = FMA(KP618033988, T1w, T1t);
Chris@42 96 T1G = T1C - T1D;
Chris@42 97 T1E = T1C + T1D;
Chris@42 98 TM = TI - TJ;
Chris@42 99 TK = TI + TJ;
Chris@42 100 T11 = FMA(KP618033988, Tt, TA);
Chris@42 101 TB = FNMS(KP618033988, TA, Tt);
Chris@42 102 Tc = ci[0];
Chris@42 103 }
Chris@42 104 Te = ci[WS(rs, 3)];
Chris@42 105 Tf = cr[WS(rs, 1)];
Chris@42 106 {
Chris@42 107 E T4, T1k, Td, T1l, Tg, T5;
Chris@42 108 T4 = cr[WS(rs, 2)];
Chris@42 109 T1k = Tb - Tc;
Chris@42 110 Td = Tb + Tc;
Chris@42 111 T1l = Te - Tf;
Chris@42 112 Tg = Te + Tf;
Chris@42 113 T5 = ci[WS(rs, 2)];
Chris@42 114 T7 = ci[WS(rs, 1)];
Chris@42 115 T1m = T1k + T1l;
Chris@42 116 T1J = T1k - T1l;
Chris@42 117 TO = Td - Tg;
Chris@42 118 Th = Td + Tg;
Chris@42 119 T1h = T4 - T5;
Chris@42 120 T6 = T4 + T5;
Chris@42 121 T8 = cr[WS(rs, 3)];
Chris@42 122 }
Chris@42 123 }
Chris@42 124 }
Chris@42 125 }
Chris@42 126 ci[0] = TH + TK;
Chris@42 127 T1i = T7 - T8;
Chris@42 128 T9 = T7 + T8;
Chris@42 129 {
Chris@42 130 E T2d, T1F, T29, T1I, TP, T2c, T1p, Tl, T1o, Tk, T2b, T2e, T17, T14, T13;
Chris@42 131 T2d = T1B + T1E;
Chris@42 132 T1F = FNMS(KP250000000, T1E, T1B);
Chris@42 133 {
Chris@42 134 E T1j, Ta, T1n, Ti, T2a;
Chris@42 135 T29 = W[8];
Chris@42 136 T1I = T1h - T1i;
Chris@42 137 T1j = T1h + T1i;
Chris@42 138 TP = T6 - T9;
Chris@42 139 Ta = T6 + T9;
Chris@42 140 T2c = W[9];
Chris@42 141 T1p = T1j - T1m;
Chris@42 142 T1n = T1j + T1m;
Chris@42 143 Tl = Ta - Th;
Chris@42 144 Ti = Ta + Th;
Chris@42 145 T1o = FNMS(KP250000000, T1n, T1g);
Chris@42 146 T2a = T1g + T1n;
Chris@42 147 cr[0] = T3 + Ti;
Chris@42 148 Tk = FNMS(KP250000000, Ti, T3);
Chris@42 149 T2b = T29 * T2a;
Chris@42 150 T2e = T2c * T2a;
Chris@42 151 }
Chris@42 152 {
Chris@42 153 E T16, TQ, T10, Tm, TL;
Chris@42 154 T16 = FMA(KP618033988, TO, TP);
Chris@42 155 TQ = FNMS(KP618033988, TP, TO);
Chris@42 156 cr[WS(rs, 5)] = FNMS(T2c, T2d, T2b);
Chris@42 157 ci[WS(rs, 5)] = FMA(T29, T2d, T2e);
Chris@42 158 T10 = FMA(KP559016994, Tl, Tk);
Chris@42 159 Tm = FNMS(KP559016994, Tl, Tk);
Chris@42 160 TL = FNMS(KP250000000, TK, TH);
Chris@42 161 {
Chris@42 162 E TE, TU, T12, TR, TX, T1d, T1c, T19, TD, T1e, T1b, TW, TT;
Chris@42 163 {
Chris@42 164 E TC, T15, T1a, TS, Tj, TN;
Chris@42 165 TE = W[3];
Chris@42 166 TC = FMA(KP951056516, TB, Tm);
Chris@42 167 TU = FNMS(KP951056516, TB, Tm);
Chris@42 168 TN = FNMS(KP559016994, TM, TL);
Chris@42 169 T15 = FMA(KP559016994, TM, TL);
Chris@42 170 T12 = FMA(KP951056516, T11, T10);
Chris@42 171 T1a = FNMS(KP951056516, T11, T10);
Chris@42 172 TS = TE * TC;
Chris@42 173 TR = FNMS(KP951056516, TQ, TN);
Chris@42 174 TX = FMA(KP951056516, TQ, TN);
Chris@42 175 Tj = W[2];
Chris@42 176 T1d = FMA(KP951056516, T16, T15);
Chris@42 177 T17 = FNMS(KP951056516, T16, T15);
Chris@42 178 T1c = W[11];
Chris@42 179 T19 = W[10];
Chris@42 180 ci[WS(rs, 2)] = FMA(Tj, TR, TS);
Chris@42 181 TD = Tj * TC;
Chris@42 182 T1e = T1c * T1a;
Chris@42 183 T1b = T19 * T1a;
Chris@42 184 }
Chris@42 185 cr[WS(rs, 2)] = FNMS(TE, TR, TD);
Chris@42 186 ci[WS(rs, 6)] = FMA(T19, T1d, T1e);
Chris@42 187 cr[WS(rs, 6)] = FNMS(T1c, T1d, T1b);
Chris@42 188 TW = W[15];
Chris@42 189 TT = W[14];
Chris@42 190 {
Chris@42 191 E TZ, T18, TY, TV;
Chris@42 192 T14 = W[7];
Chris@42 193 TY = TW * TU;
Chris@42 194 TV = TT * TU;
Chris@42 195 TZ = W[6];
Chris@42 196 T18 = T14 * T12;
Chris@42 197 ci[WS(rs, 8)] = FMA(TT, TX, TY);
Chris@42 198 cr[WS(rs, 8)] = FNMS(TW, TX, TV);
Chris@42 199 T13 = TZ * T12;
Chris@42 200 ci[WS(rs, 4)] = FMA(TZ, T17, T18);
Chris@42 201 }
Chris@42 202 }
Chris@42 203 }
Chris@42 204 {
Chris@42 205 E T20, T1K, T1q, T1U;
Chris@42 206 T20 = FNMS(KP618033988, T1I, T1J);
Chris@42 207 T1K = FMA(KP618033988, T1J, T1I);
Chris@42 208 cr[WS(rs, 4)] = FNMS(T14, T17, T13);
Chris@42 209 T1q = FMA(KP559016994, T1p, T1o);
Chris@42 210 T1U = FNMS(KP559016994, T1p, T1o);
Chris@42 211 {
Chris@42 212 E T1A, T1O, T1W, T1R, T1L, T27, T26, T23, T1z, T28, T25, T1Q, T1N;
Chris@42 213 {
Chris@42 214 E T1y, T1Z, T24, T1M, T1f, T1H;
Chris@42 215 T1A = W[1];
Chris@42 216 T1O = FMA(KP951056516, T1x, T1q);
Chris@42 217 T1y = FNMS(KP951056516, T1x, T1q);
Chris@42 218 T1Z = FNMS(KP559016994, T1G, T1F);
Chris@42 219 T1H = FMA(KP559016994, T1G, T1F);
Chris@42 220 T24 = FMA(KP951056516, T1V, T1U);
Chris@42 221 T1W = FNMS(KP951056516, T1V, T1U);
Chris@42 222 T1M = T1A * T1y;
Chris@42 223 T1R = FNMS(KP951056516, T1K, T1H);
Chris@42 224 T1L = FMA(KP951056516, T1K, T1H);
Chris@42 225 T1f = W[0];
Chris@42 226 T21 = FMA(KP951056516, T20, T1Z);
Chris@42 227 T27 = FNMS(KP951056516, T20, T1Z);
Chris@42 228 T26 = W[13];
Chris@42 229 T23 = W[12];
Chris@42 230 ci[WS(rs, 1)] = FMA(T1f, T1L, T1M);
Chris@42 231 T1z = T1f * T1y;
Chris@42 232 T28 = T26 * T24;
Chris@42 233 T25 = T23 * T24;
Chris@42 234 }
Chris@42 235 cr[WS(rs, 1)] = FNMS(T1A, T1L, T1z);
Chris@42 236 ci[WS(rs, 7)] = FMA(T23, T27, T28);
Chris@42 237 cr[WS(rs, 7)] = FNMS(T26, T27, T25);
Chris@42 238 T1Q = W[17];
Chris@42 239 T1N = W[16];
Chris@42 240 {
Chris@42 241 E T1T, T22, T1S, T1P;
Chris@42 242 T1Y = W[5];
Chris@42 243 T1S = T1Q * T1O;
Chris@42 244 T1P = T1N * T1O;
Chris@42 245 T1T = W[4];
Chris@42 246 T22 = T1Y * T1W;
Chris@42 247 ci[WS(rs, 9)] = FMA(T1N, T1R, T1S);
Chris@42 248 cr[WS(rs, 9)] = FNMS(T1Q, T1R, T1P);
Chris@42 249 T1X = T1T * T1W;
Chris@42 250 ci[WS(rs, 3)] = FMA(T1T, T21, T22);
Chris@42 251 }
Chris@42 252 }
Chris@42 253 }
Chris@42 254 }
Chris@42 255 }
Chris@42 256 cr[WS(rs, 3)] = FNMS(T1Y, T21, T1X);
Chris@42 257 }
Chris@42 258 }
Chris@42 259 }
Chris@42 260
Chris@42 261 static const tw_instr twinstr[] = {
Chris@42 262 {TW_FULL, 1, 10},
Chris@42 263 {TW_NEXT, 1, 0}
Chris@42 264 };
Chris@42 265
Chris@42 266 static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@42 267
Chris@42 268 void X(codelet_hb_10) (planner *p) {
Chris@42 269 X(khc2hc_register) (p, hb_10, &desc);
Chris@42 270 }
Chris@42 271 #else /* HAVE_FMA */
Chris@42 272
Chris@42 273 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include hb.h */
Chris@42 274
Chris@42 275 /*
Chris@42 276 * This function contains 102 FP additions, 60 FP multiplications,
Chris@42 277 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@42 278 * 41 stack variables, 4 constants, and 40 memory accesses
Chris@42 279 */
Chris@42 280 #include "hb.h"
Chris@42 281
Chris@42 282 static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 283 {
Chris@42 284 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 285 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 286 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 287 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 288 {
Chris@42 289 INT m;
Chris@42 290 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 291 E T3, T18, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, TJ, T1i, Tt, TA, T1w;
Chris@42 292 E T1v, T1p, T1E, TM, TO;
Chris@42 293 {
Chris@42 294 E T1, T2, TH, TI;
Chris@42 295 T1 = cr[0];
Chris@42 296 T2 = ci[WS(rs, 4)];
Chris@42 297 T3 = T1 + T2;
Chris@42 298 T18 = T1 - T2;
Chris@42 299 {
Chris@42 300 E T6, T19, Tg, T1d, T9, T1a, Td, T1c;
Chris@42 301 {
Chris@42 302 E T4, T5, Te, Tf;
Chris@42 303 T4 = cr[WS(rs, 2)];
Chris@42 304 T5 = ci[WS(rs, 2)];
Chris@42 305 T6 = T4 + T5;
Chris@42 306 T19 = T4 - T5;
Chris@42 307 Te = ci[WS(rs, 3)];
Chris@42 308 Tf = cr[WS(rs, 1)];
Chris@42 309 Tg = Te + Tf;
Chris@42 310 T1d = Te - Tf;
Chris@42 311 }
Chris@42 312 {
Chris@42 313 E T7, T8, Tb, Tc;
Chris@42 314 T7 = ci[WS(rs, 1)];
Chris@42 315 T8 = cr[WS(rs, 3)];
Chris@42 316 T9 = T7 + T8;
Chris@42 317 T1a = T7 - T8;
Chris@42 318 Tb = cr[WS(rs, 4)];
Chris@42 319 Tc = ci[0];
Chris@42 320 Td = Tb + Tc;
Chris@42 321 T1c = Tb - Tc;
Chris@42 322 }
Chris@42 323 TE = T6 - T9;
Chris@42 324 TF = Td - Tg;
Chris@42 325 T1B = T1c - T1d;
Chris@42 326 T1A = T19 - T1a;
Chris@42 327 {
Chris@42 328 E T1b, T1e, Ta, Th;
Chris@42 329 T1b = T19 + T1a;
Chris@42 330 T1e = T1c + T1d;
Chris@42 331 T1f = T1b + T1e;
Chris@42 332 T1t = KP559016994 * (T1b - T1e);
Chris@42 333 Ta = T6 + T9;
Chris@42 334 Th = Td + Tg;
Chris@42 335 Ti = Ta + Th;
Chris@42 336 Tl = KP559016994 * (Ta - Th);
Chris@42 337 }
Chris@42 338 }
Chris@42 339 TH = ci[WS(rs, 9)];
Chris@42 340 TI = cr[WS(rs, 5)];
Chris@42 341 TJ = TH - TI;
Chris@42 342 T1i = TH + TI;
Chris@42 343 {
Chris@42 344 E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m;
Chris@42 345 {
Chris@42 346 E Tn, To, Tx, Ty;
Chris@42 347 Tn = ci[WS(rs, 7)];
Chris@42 348 To = cr[WS(rs, 7)];
Chris@42 349 Tp = Tn - To;
Chris@42 350 T1j = Tn + To;
Chris@42 351 Tx = ci[WS(rs, 8)];
Chris@42 352 Ty = cr[WS(rs, 6)];
Chris@42 353 Tz = Tx - Ty;
Chris@42 354 T1n = Tx + Ty;
Chris@42 355 }
Chris@42 356 {
Chris@42 357 E Tq, Tr, Tu, Tv;
Chris@42 358 Tq = ci[WS(rs, 6)];
Chris@42 359 Tr = cr[WS(rs, 8)];
Chris@42 360 Ts = Tq - Tr;
Chris@42 361 T1k = Tq + Tr;
Chris@42 362 Tu = ci[WS(rs, 5)];
Chris@42 363 Tv = cr[WS(rs, 9)];
Chris@42 364 Tw = Tu - Tv;
Chris@42 365 T1m = Tu + Tv;
Chris@42 366 }
Chris@42 367 Tt = Tp - Ts;
Chris@42 368 TA = Tw - Tz;
Chris@42 369 T1w = T1m + T1n;
Chris@42 370 T1v = T1j + T1k;
Chris@42 371 {
Chris@42 372 E T1l, T1o, TK, TL;
Chris@42 373 T1l = T1j - T1k;
Chris@42 374 T1o = T1m - T1n;
Chris@42 375 T1p = T1l + T1o;
Chris@42 376 T1E = KP559016994 * (T1l - T1o);
Chris@42 377 TK = Tp + Ts;
Chris@42 378 TL = Tw + Tz;
Chris@42 379 TM = TK + TL;
Chris@42 380 TO = KP559016994 * (TK - TL);
Chris@42 381 }
Chris@42 382 }
Chris@42 383 }
Chris@42 384 cr[0] = T3 + Ti;
Chris@42 385 ci[0] = TJ + TM;
Chris@42 386 {
Chris@42 387 E T1g, T1q, T17, T1h;
Chris@42 388 T1g = T18 + T1f;
Chris@42 389 T1q = T1i + T1p;
Chris@42 390 T17 = W[8];
Chris@42 391 T1h = W[9];
Chris@42 392 cr[WS(rs, 5)] = FNMS(T1h, T1q, T17 * T1g);
Chris@42 393 ci[WS(rs, 5)] = FMA(T1h, T1g, T17 * T1q);
Chris@42 394 }
Chris@42 395 {
Chris@42 396 E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk;
Chris@42 397 TB = FNMS(KP951056516, TA, KP587785252 * Tt);
Chris@42 398 TG = FNMS(KP951056516, TF, KP587785252 * TE);
Chris@42 399 T11 = FMA(KP951056516, TE, KP587785252 * TF);
Chris@42 400 TX = FMA(KP951056516, Tt, KP587785252 * TA);
Chris@42 401 TN = FNMS(KP250000000, TM, TJ);
Chris@42 402 TP = TN - TO;
Chris@42 403 T10 = TO + TN;
Chris@42 404 Tk = FNMS(KP250000000, Ti, T3);
Chris@42 405 Tm = Tk - Tl;
Chris@42 406 TW = Tl + Tk;
Chris@42 407 {
Chris@42 408 E TC, TQ, Tj, TD;
Chris@42 409 TC = Tm - TB;
Chris@42 410 TQ = TG + TP;
Chris@42 411 Tj = W[2];
Chris@42 412 TD = W[3];
Chris@42 413 cr[WS(rs, 2)] = FNMS(TD, TQ, Tj * TC);
Chris@42 414 ci[WS(rs, 2)] = FMA(TD, TC, Tj * TQ);
Chris@42 415 }
Chris@42 416 {
Chris@42 417 E T14, T16, T13, T15;
Chris@42 418 T14 = TW - TX;
Chris@42 419 T16 = T11 + T10;
Chris@42 420 T13 = W[10];
Chris@42 421 T15 = W[11];
Chris@42 422 cr[WS(rs, 6)] = FNMS(T15, T16, T13 * T14);
Chris@42 423 ci[WS(rs, 6)] = FMA(T15, T14, T13 * T16);
Chris@42 424 }
Chris@42 425 {
Chris@42 426 E TS, TU, TR, TT;
Chris@42 427 TS = Tm + TB;
Chris@42 428 TU = TP - TG;
Chris@42 429 TR = W[14];
Chris@42 430 TT = W[15];
Chris@42 431 cr[WS(rs, 8)] = FNMS(TT, TU, TR * TS);
Chris@42 432 ci[WS(rs, 8)] = FMA(TT, TS, TR * TU);
Chris@42 433 }
Chris@42 434 {
Chris@42 435 E TY, T12, TV, TZ;
Chris@42 436 TY = TW + TX;
Chris@42 437 T12 = T10 - T11;
Chris@42 438 TV = W[6];
Chris@42 439 TZ = W[7];
Chris@42 440 cr[WS(rs, 4)] = FNMS(TZ, T12, TV * TY);
Chris@42 441 ci[WS(rs, 4)] = FMA(TZ, TY, TV * T12);
Chris@42 442 }
Chris@42 443 }
Chris@42 444 {
Chris@42 445 E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s;
Chris@42 446 T1x = FNMS(KP951056516, T1w, KP587785252 * T1v);
Chris@42 447 T1C = FNMS(KP951056516, T1B, KP587785252 * T1A);
Chris@42 448 T1Q = FMA(KP951056516, T1A, KP587785252 * T1B);
Chris@42 449 T1N = FMA(KP951056516, T1v, KP587785252 * T1w);
Chris@42 450 T1D = FNMS(KP250000000, T1p, T1i);
Chris@42 451 T1F = T1D - T1E;
Chris@42 452 T1R = T1E + T1D;
Chris@42 453 T1s = FNMS(KP250000000, T1f, T18);
Chris@42 454 T1u = T1s - T1t;
Chris@42 455 T1M = T1t + T1s;
Chris@42 456 {
Chris@42 457 E T1y, T1G, T1r, T1z;
Chris@42 458 T1y = T1u - T1x;
Chris@42 459 T1G = T1C + T1F;
Chris@42 460 T1r = W[12];
Chris@42 461 T1z = W[13];
Chris@42 462 cr[WS(rs, 7)] = FNMS(T1z, T1G, T1r * T1y);
Chris@42 463 ci[WS(rs, 7)] = FMA(T1r, T1G, T1z * T1y);
Chris@42 464 }
Chris@42 465 {
Chris@42 466 E T1U, T1W, T1T, T1V;
Chris@42 467 T1U = T1M + T1N;
Chris@42 468 T1W = T1R - T1Q;
Chris@42 469 T1T = W[16];
Chris@42 470 T1V = W[17];
Chris@42 471 cr[WS(rs, 9)] = FNMS(T1V, T1W, T1T * T1U);
Chris@42 472 ci[WS(rs, 9)] = FMA(T1T, T1W, T1V * T1U);
Chris@42 473 }
Chris@42 474 {
Chris@42 475 E T1I, T1K, T1H, T1J;
Chris@42 476 T1I = T1u + T1x;
Chris@42 477 T1K = T1F - T1C;
Chris@42 478 T1H = W[4];
Chris@42 479 T1J = W[5];
Chris@42 480 cr[WS(rs, 3)] = FNMS(T1J, T1K, T1H * T1I);
Chris@42 481 ci[WS(rs, 3)] = FMA(T1H, T1K, T1J * T1I);
Chris@42 482 }
Chris@42 483 {
Chris@42 484 E T1O, T1S, T1L, T1P;
Chris@42 485 T1O = T1M - T1N;
Chris@42 486 T1S = T1Q + T1R;
Chris@42 487 T1L = W[0];
Chris@42 488 T1P = W[1];
Chris@42 489 cr[WS(rs, 1)] = FNMS(T1P, T1S, T1L * T1O);
Chris@42 490 ci[WS(rs, 1)] = FMA(T1L, T1S, T1P * T1O);
Chris@42 491 }
Chris@42 492 }
Chris@42 493 }
Chris@42 494 }
Chris@42 495 }
Chris@42 496
Chris@42 497 static const tw_instr twinstr[] = {
Chris@42 498 {TW_FULL, 1, 10},
Chris@42 499 {TW_NEXT, 1, 0}
Chris@42 500 };
Chris@42 501
Chris@42 502 static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@42 503
Chris@42 504 void X(codelet_hb_10) (planner *p) {
Chris@42 505 X(khc2hc_register) (p, hb_10, &desc);
Chris@42 506 }
Chris@42 507 #endif /* HAVE_FMA */