annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:32 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@82 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@82 33 * 47 stack variables, 4 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@82 46 E TH, T1B, TB, T11, T1E, T1G, TK, TM, T1x, T1V, T3, T1g, Tl, T1I, T1J;
Chris@82 47 E TO, TP, T1p, Ti, Tk, T1n, T1o, TF, TG;
Chris@82 48 TF = ci[WS(rs, 9)];
Chris@82 49 TG = cr[WS(rs, 5)];
Chris@82 50 TH = TF - TG;
Chris@82 51 T1B = TF + TG;
Chris@82 52 {
Chris@82 53 E Tp, T1u, Tz, T1s, Ts, T1v, Tw, T1r;
Chris@82 54 {
Chris@82 55 E Tn, To, Tx, Ty;
Chris@82 56 Tn = ci[WS(rs, 5)];
Chris@82 57 To = cr[WS(rs, 9)];
Chris@82 58 Tp = Tn - To;
Chris@82 59 T1u = Tn + To;
Chris@82 60 Tx = ci[WS(rs, 6)];
Chris@82 61 Ty = cr[WS(rs, 8)];
Chris@82 62 Tz = Tx - Ty;
Chris@82 63 T1s = Tx + Ty;
Chris@82 64 }
Chris@82 65 {
Chris@82 66 E Tq, Tr, Tu, Tv;
Chris@82 67 Tq = ci[WS(rs, 8)];
Chris@82 68 Tr = cr[WS(rs, 6)];
Chris@82 69 Ts = Tq - Tr;
Chris@82 70 T1v = Tq + Tr;
Chris@82 71 Tu = ci[WS(rs, 7)];
Chris@82 72 Tv = cr[WS(rs, 7)];
Chris@82 73 Tw = Tu - Tv;
Chris@82 74 T1r = Tu + Tv;
Chris@82 75 }
Chris@82 76 {
Chris@82 77 E Tt, TA, T1C, T1D;
Chris@82 78 Tt = Tp - Ts;
Chris@82 79 TA = Tw - Tz;
Chris@82 80 TB = FNMS(KP618033988, TA, Tt);
Chris@82 81 T11 = FMA(KP618033988, Tt, TA);
Chris@82 82 T1C = T1r - T1s;
Chris@82 83 T1D = T1u - T1v;
Chris@82 84 T1E = T1C + T1D;
Chris@82 85 T1G = T1C - T1D;
Chris@82 86 }
Chris@82 87 {
Chris@82 88 E TI, TJ, T1t, T1w;
Chris@82 89 TI = Tw + Tz;
Chris@82 90 TJ = Tp + Ts;
Chris@82 91 TK = TI + TJ;
Chris@82 92 TM = TI - TJ;
Chris@82 93 T1t = T1r + T1s;
Chris@82 94 T1w = T1u + T1v;
Chris@82 95 T1x = FMA(KP618033988, T1w, T1t);
Chris@82 96 T1V = FNMS(KP618033988, T1t, T1w);
Chris@82 97 }
Chris@82 98 }
Chris@82 99 {
Chris@82 100 E Td, T1k, Tg, T1l, Th, T1m, T6, T1h, T9, T1i, Ta, T1j, T1, T2;
Chris@82 101 T1 = cr[0];
Chris@82 102 T2 = ci[WS(rs, 4)];
Chris@82 103 T3 = T1 + T2;
Chris@82 104 T1g = T1 - T2;
Chris@82 105 {
Chris@82 106 E Tb, Tc, Te, Tf;
Chris@82 107 Tb = cr[WS(rs, 4)];
Chris@82 108 Tc = ci[0];
Chris@82 109 Td = Tb + Tc;
Chris@82 110 T1k = Tb - Tc;
Chris@82 111 Te = ci[WS(rs, 3)];
Chris@82 112 Tf = cr[WS(rs, 1)];
Chris@82 113 Tg = Te + Tf;
Chris@82 114 T1l = Te - Tf;
Chris@82 115 }
Chris@82 116 Th = Td + Tg;
Chris@82 117 T1m = T1k + T1l;
Chris@82 118 {
Chris@82 119 E T4, T5, T7, T8;
Chris@82 120 T4 = cr[WS(rs, 2)];
Chris@82 121 T5 = ci[WS(rs, 2)];
Chris@82 122 T6 = T4 + T5;
Chris@82 123 T1h = T4 - T5;
Chris@82 124 T7 = ci[WS(rs, 1)];
Chris@82 125 T8 = cr[WS(rs, 3)];
Chris@82 126 T9 = T7 + T8;
Chris@82 127 T1i = T7 - T8;
Chris@82 128 }
Chris@82 129 Ta = T6 + T9;
Chris@82 130 T1j = T1h + T1i;
Chris@82 131 Tl = Ta - Th;
Chris@82 132 T1I = T1h - T1i;
Chris@82 133 T1J = T1k - T1l;
Chris@82 134 TO = Td - Tg;
Chris@82 135 TP = T6 - T9;
Chris@82 136 T1p = T1j - T1m;
Chris@82 137 Ti = Ta + Th;
Chris@82 138 Tk = FNMS(KP250000000, Ti, T3);
Chris@82 139 T1n = T1j + T1m;
Chris@82 140 T1o = FNMS(KP250000000, T1n, T1g);
Chris@82 141 }
Chris@82 142 cr[0] = T3 + Ti;
Chris@82 143 ci[0] = TH + TK;
Chris@82 144 {
Chris@82 145 E T2d, T29, T2b, T2c, T2e, T2a;
Chris@82 146 T2d = T1B + T1E;
Chris@82 147 T2a = T1g + T1n;
Chris@82 148 T29 = W[8];
Chris@82 149 T2b = T29 * T2a;
Chris@82 150 T2c = W[9];
Chris@82 151 T2e = T2c * T2a;
Chris@82 152 cr[WS(rs, 5)] = FNMS(T2c, T2d, T2b);
Chris@82 153 ci[WS(rs, 5)] = FMA(T29, T2d, T2e);
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E TQ, T16, TC, TU, TN, T15, T12, T1a, Tm, TL, T10;
Chris@82 157 TQ = FNMS(KP618033988, TP, TO);
Chris@82 158 T16 = FMA(KP618033988, TO, TP);
Chris@82 159 Tm = FNMS(KP559016994, Tl, Tk);
Chris@82 160 TC = FMA(KP951056516, TB, Tm);
Chris@82 161 TU = FNMS(KP951056516, TB, Tm);
Chris@82 162 TL = FNMS(KP250000000, TK, TH);
Chris@82 163 TN = FNMS(KP559016994, TM, TL);
Chris@82 164 T15 = FMA(KP559016994, TM, TL);
Chris@82 165 T10 = FMA(KP559016994, Tl, Tk);
Chris@82 166 T12 = FMA(KP951056516, T11, T10);
Chris@82 167 T1a = FNMS(KP951056516, T11, T10);
Chris@82 168 {
Chris@82 169 E TR, TE, TS, Tj, TD;
Chris@82 170 TR = FNMS(KP951056516, TQ, TN);
Chris@82 171 TE = W[3];
Chris@82 172 TS = TE * TC;
Chris@82 173 Tj = W[2];
Chris@82 174 TD = Tj * TC;
Chris@82 175 cr[WS(rs, 2)] = FNMS(TE, TR, TD);
Chris@82 176 ci[WS(rs, 2)] = FMA(Tj, TR, TS);
Chris@82 177 }
Chris@82 178 {
Chris@82 179 E T1d, T1c, T1e, T19, T1b;
Chris@82 180 T1d = FMA(KP951056516, T16, T15);
Chris@82 181 T1c = W[11];
Chris@82 182 T1e = T1c * T1a;
Chris@82 183 T19 = W[10];
Chris@82 184 T1b = T19 * T1a;
Chris@82 185 cr[WS(rs, 6)] = FNMS(T1c, T1d, T1b);
Chris@82 186 ci[WS(rs, 6)] = FMA(T19, T1d, T1e);
Chris@82 187 }
Chris@82 188 {
Chris@82 189 E TX, TW, TY, TT, TV;
Chris@82 190 TX = FMA(KP951056516, TQ, TN);
Chris@82 191 TW = W[15];
Chris@82 192 TY = TW * TU;
Chris@82 193 TT = W[14];
Chris@82 194 TV = TT * TU;
Chris@82 195 cr[WS(rs, 8)] = FNMS(TW, TX, TV);
Chris@82 196 ci[WS(rs, 8)] = FMA(TT, TX, TY);
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T17, T14, T18, TZ, T13;
Chris@82 200 T17 = FNMS(KP951056516, T16, T15);
Chris@82 201 T14 = W[7];
Chris@82 202 T18 = T14 * T12;
Chris@82 203 TZ = W[6];
Chris@82 204 T13 = TZ * T12;
Chris@82 205 cr[WS(rs, 4)] = FNMS(T14, T17, T13);
Chris@82 206 ci[WS(rs, 4)] = FMA(TZ, T17, T18);
Chris@82 207 }
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E T1K, T20, T1y, T1O, T1H, T1Z, T1W, T24, T1q, T1F, T1U;
Chris@82 211 T1K = FMA(KP618033988, T1J, T1I);
Chris@82 212 T20 = FNMS(KP618033988, T1I, T1J);
Chris@82 213 T1q = FMA(KP559016994, T1p, T1o);
Chris@82 214 T1y = FNMS(KP951056516, T1x, T1q);
Chris@82 215 T1O = FMA(KP951056516, T1x, T1q);
Chris@82 216 T1F = FNMS(KP250000000, T1E, T1B);
Chris@82 217 T1H = FMA(KP559016994, T1G, T1F);
Chris@82 218 T1Z = FNMS(KP559016994, T1G, T1F);
Chris@82 219 T1U = FNMS(KP559016994, T1p, T1o);
Chris@82 220 T1W = FNMS(KP951056516, T1V, T1U);
Chris@82 221 T24 = FMA(KP951056516, T1V, T1U);
Chris@82 222 {
Chris@82 223 E T1L, T1A, T1M, T1f, T1z;
Chris@82 224 T1L = FMA(KP951056516, T1K, T1H);
Chris@82 225 T1A = W[1];
Chris@82 226 T1M = T1A * T1y;
Chris@82 227 T1f = W[0];
Chris@82 228 T1z = T1f * T1y;
Chris@82 229 cr[WS(rs, 1)] = FNMS(T1A, T1L, T1z);
Chris@82 230 ci[WS(rs, 1)] = FMA(T1f, T1L, T1M);
Chris@82 231 }
Chris@82 232 {
Chris@82 233 E T27, T26, T28, T23, T25;
Chris@82 234 T27 = FNMS(KP951056516, T20, T1Z);
Chris@82 235 T26 = W[13];
Chris@82 236 T28 = T26 * T24;
Chris@82 237 T23 = W[12];
Chris@82 238 T25 = T23 * T24;
Chris@82 239 cr[WS(rs, 7)] = FNMS(T26, T27, T25);
Chris@82 240 ci[WS(rs, 7)] = FMA(T23, T27, T28);
Chris@82 241 }
Chris@82 242 {
Chris@82 243 E T1R, T1Q, T1S, T1N, T1P;
Chris@82 244 T1R = FNMS(KP951056516, T1K, T1H);
Chris@82 245 T1Q = W[17];
Chris@82 246 T1S = T1Q * T1O;
Chris@82 247 T1N = W[16];
Chris@82 248 T1P = T1N * T1O;
Chris@82 249 cr[WS(rs, 9)] = FNMS(T1Q, T1R, T1P);
Chris@82 250 ci[WS(rs, 9)] = FMA(T1N, T1R, T1S);
Chris@82 251 }
Chris@82 252 {
Chris@82 253 E T21, T1Y, T22, T1T, T1X;
Chris@82 254 T21 = FMA(KP951056516, T20, T1Z);
Chris@82 255 T1Y = W[5];
Chris@82 256 T22 = T1Y * T1W;
Chris@82 257 T1T = W[4];
Chris@82 258 T1X = T1T * T1W;
Chris@82 259 cr[WS(rs, 3)] = FNMS(T1Y, T21, T1X);
Chris@82 260 ci[WS(rs, 3)] = FMA(T1T, T21, T22);
Chris@82 261 }
Chris@82 262 }
Chris@82 263 }
Chris@82 264 }
Chris@82 265 }
Chris@82 266
Chris@82 267 static const tw_instr twinstr[] = {
Chris@82 268 {TW_FULL, 1, 10},
Chris@82 269 {TW_NEXT, 1, 0}
Chris@82 270 };
Chris@82 271
Chris@82 272 static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@82 273
Chris@82 274 void X(codelet_hb_10) (planner *p) {
Chris@82 275 X(khc2hc_register) (p, hb_10, &desc);
Chris@82 276 }
Chris@82 277 #else
Chris@82 278
Chris@82 279 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hb_10 -include rdft/scalar/hb.h */
Chris@82 280
Chris@82 281 /*
Chris@82 282 * This function contains 102 FP additions, 60 FP multiplications,
Chris@82 283 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 284 * 41 stack variables, 4 constants, and 40 memory accesses
Chris@82 285 */
Chris@82 286 #include "rdft/scalar/hb.h"
Chris@82 287
Chris@82 288 static void hb_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 289 {
Chris@82 290 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 291 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 292 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 293 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 294 {
Chris@82 295 INT m;
Chris@82 296 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@82 297 E T3, T18, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, TJ, T1i, Tt, TA, T1w;
Chris@82 298 E T1v, T1p, T1E, TM, TO;
Chris@82 299 {
Chris@82 300 E T1, T2, TH, TI;
Chris@82 301 T1 = cr[0];
Chris@82 302 T2 = ci[WS(rs, 4)];
Chris@82 303 T3 = T1 + T2;
Chris@82 304 T18 = T1 - T2;
Chris@82 305 {
Chris@82 306 E T6, T19, Tg, T1d, T9, T1a, Td, T1c;
Chris@82 307 {
Chris@82 308 E T4, T5, Te, Tf;
Chris@82 309 T4 = cr[WS(rs, 2)];
Chris@82 310 T5 = ci[WS(rs, 2)];
Chris@82 311 T6 = T4 + T5;
Chris@82 312 T19 = T4 - T5;
Chris@82 313 Te = ci[WS(rs, 3)];
Chris@82 314 Tf = cr[WS(rs, 1)];
Chris@82 315 Tg = Te + Tf;
Chris@82 316 T1d = Te - Tf;
Chris@82 317 }
Chris@82 318 {
Chris@82 319 E T7, T8, Tb, Tc;
Chris@82 320 T7 = ci[WS(rs, 1)];
Chris@82 321 T8 = cr[WS(rs, 3)];
Chris@82 322 T9 = T7 + T8;
Chris@82 323 T1a = T7 - T8;
Chris@82 324 Tb = cr[WS(rs, 4)];
Chris@82 325 Tc = ci[0];
Chris@82 326 Td = Tb + Tc;
Chris@82 327 T1c = Tb - Tc;
Chris@82 328 }
Chris@82 329 TE = T6 - T9;
Chris@82 330 TF = Td - Tg;
Chris@82 331 T1B = T1c - T1d;
Chris@82 332 T1A = T19 - T1a;
Chris@82 333 {
Chris@82 334 E T1b, T1e, Ta, Th;
Chris@82 335 T1b = T19 + T1a;
Chris@82 336 T1e = T1c + T1d;
Chris@82 337 T1f = T1b + T1e;
Chris@82 338 T1t = KP559016994 * (T1b - T1e);
Chris@82 339 Ta = T6 + T9;
Chris@82 340 Th = Td + Tg;
Chris@82 341 Ti = Ta + Th;
Chris@82 342 Tl = KP559016994 * (Ta - Th);
Chris@82 343 }
Chris@82 344 }
Chris@82 345 TH = ci[WS(rs, 9)];
Chris@82 346 TI = cr[WS(rs, 5)];
Chris@82 347 TJ = TH - TI;
Chris@82 348 T1i = TH + TI;
Chris@82 349 {
Chris@82 350 E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m;
Chris@82 351 {
Chris@82 352 E Tn, To, Tx, Ty;
Chris@82 353 Tn = ci[WS(rs, 7)];
Chris@82 354 To = cr[WS(rs, 7)];
Chris@82 355 Tp = Tn - To;
Chris@82 356 T1j = Tn + To;
Chris@82 357 Tx = ci[WS(rs, 8)];
Chris@82 358 Ty = cr[WS(rs, 6)];
Chris@82 359 Tz = Tx - Ty;
Chris@82 360 T1n = Tx + Ty;
Chris@82 361 }
Chris@82 362 {
Chris@82 363 E Tq, Tr, Tu, Tv;
Chris@82 364 Tq = ci[WS(rs, 6)];
Chris@82 365 Tr = cr[WS(rs, 8)];
Chris@82 366 Ts = Tq - Tr;
Chris@82 367 T1k = Tq + Tr;
Chris@82 368 Tu = ci[WS(rs, 5)];
Chris@82 369 Tv = cr[WS(rs, 9)];
Chris@82 370 Tw = Tu - Tv;
Chris@82 371 T1m = Tu + Tv;
Chris@82 372 }
Chris@82 373 Tt = Tp - Ts;
Chris@82 374 TA = Tw - Tz;
Chris@82 375 T1w = T1m + T1n;
Chris@82 376 T1v = T1j + T1k;
Chris@82 377 {
Chris@82 378 E T1l, T1o, TK, TL;
Chris@82 379 T1l = T1j - T1k;
Chris@82 380 T1o = T1m - T1n;
Chris@82 381 T1p = T1l + T1o;
Chris@82 382 T1E = KP559016994 * (T1l - T1o);
Chris@82 383 TK = Tp + Ts;
Chris@82 384 TL = Tw + Tz;
Chris@82 385 TM = TK + TL;
Chris@82 386 TO = KP559016994 * (TK - TL);
Chris@82 387 }
Chris@82 388 }
Chris@82 389 }
Chris@82 390 cr[0] = T3 + Ti;
Chris@82 391 ci[0] = TJ + TM;
Chris@82 392 {
Chris@82 393 E T1g, T1q, T17, T1h;
Chris@82 394 T1g = T18 + T1f;
Chris@82 395 T1q = T1i + T1p;
Chris@82 396 T17 = W[8];
Chris@82 397 T1h = W[9];
Chris@82 398 cr[WS(rs, 5)] = FNMS(T1h, T1q, T17 * T1g);
Chris@82 399 ci[WS(rs, 5)] = FMA(T1h, T1g, T17 * T1q);
Chris@82 400 }
Chris@82 401 {
Chris@82 402 E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk;
Chris@82 403 TB = FNMS(KP951056516, TA, KP587785252 * Tt);
Chris@82 404 TG = FNMS(KP951056516, TF, KP587785252 * TE);
Chris@82 405 T11 = FMA(KP951056516, TE, KP587785252 * TF);
Chris@82 406 TX = FMA(KP951056516, Tt, KP587785252 * TA);
Chris@82 407 TN = FNMS(KP250000000, TM, TJ);
Chris@82 408 TP = TN - TO;
Chris@82 409 T10 = TO + TN;
Chris@82 410 Tk = FNMS(KP250000000, Ti, T3);
Chris@82 411 Tm = Tk - Tl;
Chris@82 412 TW = Tl + Tk;
Chris@82 413 {
Chris@82 414 E TC, TQ, Tj, TD;
Chris@82 415 TC = Tm - TB;
Chris@82 416 TQ = TG + TP;
Chris@82 417 Tj = W[2];
Chris@82 418 TD = W[3];
Chris@82 419 cr[WS(rs, 2)] = FNMS(TD, TQ, Tj * TC);
Chris@82 420 ci[WS(rs, 2)] = FMA(TD, TC, Tj * TQ);
Chris@82 421 }
Chris@82 422 {
Chris@82 423 E T14, T16, T13, T15;
Chris@82 424 T14 = TW - TX;
Chris@82 425 T16 = T11 + T10;
Chris@82 426 T13 = W[10];
Chris@82 427 T15 = W[11];
Chris@82 428 cr[WS(rs, 6)] = FNMS(T15, T16, T13 * T14);
Chris@82 429 ci[WS(rs, 6)] = FMA(T15, T14, T13 * T16);
Chris@82 430 }
Chris@82 431 {
Chris@82 432 E TS, TU, TR, TT;
Chris@82 433 TS = Tm + TB;
Chris@82 434 TU = TP - TG;
Chris@82 435 TR = W[14];
Chris@82 436 TT = W[15];
Chris@82 437 cr[WS(rs, 8)] = FNMS(TT, TU, TR * TS);
Chris@82 438 ci[WS(rs, 8)] = FMA(TT, TS, TR * TU);
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E TY, T12, TV, TZ;
Chris@82 442 TY = TW + TX;
Chris@82 443 T12 = T10 - T11;
Chris@82 444 TV = W[6];
Chris@82 445 TZ = W[7];
Chris@82 446 cr[WS(rs, 4)] = FNMS(TZ, T12, TV * TY);
Chris@82 447 ci[WS(rs, 4)] = FMA(TZ, TY, TV * T12);
Chris@82 448 }
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s;
Chris@82 452 T1x = FNMS(KP951056516, T1w, KP587785252 * T1v);
Chris@82 453 T1C = FNMS(KP951056516, T1B, KP587785252 * T1A);
Chris@82 454 T1Q = FMA(KP951056516, T1A, KP587785252 * T1B);
Chris@82 455 T1N = FMA(KP951056516, T1v, KP587785252 * T1w);
Chris@82 456 T1D = FNMS(KP250000000, T1p, T1i);
Chris@82 457 T1F = T1D - T1E;
Chris@82 458 T1R = T1E + T1D;
Chris@82 459 T1s = FNMS(KP250000000, T1f, T18);
Chris@82 460 T1u = T1s - T1t;
Chris@82 461 T1M = T1t + T1s;
Chris@82 462 {
Chris@82 463 E T1y, T1G, T1r, T1z;
Chris@82 464 T1y = T1u - T1x;
Chris@82 465 T1G = T1C + T1F;
Chris@82 466 T1r = W[12];
Chris@82 467 T1z = W[13];
Chris@82 468 cr[WS(rs, 7)] = FNMS(T1z, T1G, T1r * T1y);
Chris@82 469 ci[WS(rs, 7)] = FMA(T1r, T1G, T1z * T1y);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T1U, T1W, T1T, T1V;
Chris@82 473 T1U = T1M + T1N;
Chris@82 474 T1W = T1R - T1Q;
Chris@82 475 T1T = W[16];
Chris@82 476 T1V = W[17];
Chris@82 477 cr[WS(rs, 9)] = FNMS(T1V, T1W, T1T * T1U);
Chris@82 478 ci[WS(rs, 9)] = FMA(T1T, T1W, T1V * T1U);
Chris@82 479 }
Chris@82 480 {
Chris@82 481 E T1I, T1K, T1H, T1J;
Chris@82 482 T1I = T1u + T1x;
Chris@82 483 T1K = T1F - T1C;
Chris@82 484 T1H = W[4];
Chris@82 485 T1J = W[5];
Chris@82 486 cr[WS(rs, 3)] = FNMS(T1J, T1K, T1H * T1I);
Chris@82 487 ci[WS(rs, 3)] = FMA(T1H, T1K, T1J * T1I);
Chris@82 488 }
Chris@82 489 {
Chris@82 490 E T1O, T1S, T1L, T1P;
Chris@82 491 T1O = T1M - T1N;
Chris@82 492 T1S = T1Q + T1R;
Chris@82 493 T1L = W[0];
Chris@82 494 T1P = W[1];
Chris@82 495 cr[WS(rs, 1)] = FNMS(T1P, T1S, T1L * T1O);
Chris@82 496 ci[WS(rs, 1)] = FMA(T1L, T1S, T1P * T1O);
Chris@82 497 }
Chris@82 498 }
Chris@82 499 }
Chris@82 500 }
Chris@82 501 }
Chris@82 502
Chris@82 503 static const tw_instr twinstr[] = {
Chris@82 504 {TW_FULL, 1, 10},
Chris@82 505 {TW_NEXT, 1, 0}
Chris@82 506 };
Chris@82 507
Chris@82 508 static const hc2hc_desc desc = { 10, "hb_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@82 509
Chris@82 510 void X(codelet_hb_10) (planner *p) {
Chris@82 511 X(khc2hc_register) (p, hb_10, &desc);
Chris@82 512 }
Chris@82 513 #endif