annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:43 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@42 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@42 33 * 69 stack variables, 10 constants, and 36 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@42 40 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@42 41 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@42 42 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 43 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@42 44 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@42 45 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@42 46 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@42 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 49 {
Chris@42 50 INT m;
Chris@42 51 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 52 E T1X, T1S, T1U, T1P, T1Y, T1T;
Chris@42 53 {
Chris@42 54 E T5, Tl, TQ, T1y, T1b, T1J, Tg, TE, TW, T13, T10, Tz, Tw, TT, T1K;
Chris@42 55 E T1B, T1L, T1E;
Chris@42 56 {
Chris@42 57 E T1, Th, T2, T3, Ti, Tj;
Chris@42 58 T1 = cr[0];
Chris@42 59 Th = ci[WS(rs, 8)];
Chris@42 60 T2 = cr[WS(rs, 3)];
Chris@42 61 T3 = ci[WS(rs, 2)];
Chris@42 62 Ti = ci[WS(rs, 5)];
Chris@42 63 Tj = cr[WS(rs, 6)];
Chris@42 64 {
Chris@42 65 E T12, Tb, TZ, TY, Ta, Tq, T11, Tr, Ts, TS, Te, Tt;
Chris@42 66 {
Chris@42 67 E T6, Tm, Tn, To, T9, Tc, Td, Tp;
Chris@42 68 {
Chris@42 69 E T7, T8, T1a, T4;
Chris@42 70 T6 = cr[WS(rs, 1)];
Chris@42 71 T1a = T2 - T3;
Chris@42 72 T4 = T2 + T3;
Chris@42 73 {
Chris@42 74 E TP, Tk, TO, T19;
Chris@42 75 TP = Ti + Tj;
Chris@42 76 Tk = Ti - Tj;
Chris@42 77 T7 = cr[WS(rs, 4)];
Chris@42 78 T5 = T1 + T4;
Chris@42 79 TO = FNMS(KP500000000, T4, T1);
Chris@42 80 Tl = Th + Tk;
Chris@42 81 T19 = FNMS(KP500000000, Tk, Th);
Chris@42 82 TQ = FNMS(KP866025403, TP, TO);
Chris@42 83 T1y = FMA(KP866025403, TP, TO);
Chris@42 84 T1b = FMA(KP866025403, T1a, T19);
Chris@42 85 T1J = FNMS(KP866025403, T1a, T19);
Chris@42 86 T8 = ci[WS(rs, 1)];
Chris@42 87 }
Chris@42 88 Tm = ci[WS(rs, 7)];
Chris@42 89 Tn = ci[WS(rs, 4)];
Chris@42 90 To = cr[WS(rs, 7)];
Chris@42 91 T9 = T7 + T8;
Chris@42 92 T12 = T7 - T8;
Chris@42 93 }
Chris@42 94 Tb = cr[WS(rs, 2)];
Chris@42 95 TZ = Tn + To;
Chris@42 96 Tp = Tn - To;
Chris@42 97 TY = FNMS(KP500000000, T9, T6);
Chris@42 98 Ta = T6 + T9;
Chris@42 99 Tc = ci[WS(rs, 3)];
Chris@42 100 Td = ci[0];
Chris@42 101 Tq = Tm + Tp;
Chris@42 102 T11 = FMS(KP500000000, Tp, Tm);
Chris@42 103 Tr = ci[WS(rs, 6)];
Chris@42 104 Ts = cr[WS(rs, 5)];
Chris@42 105 TS = Td - Tc;
Chris@42 106 Te = Tc + Td;
Chris@42 107 Tt = cr[WS(rs, 8)];
Chris@42 108 }
Chris@42 109 {
Chris@42 110 E T1C, Tv, TR, T1D, T1z, T1A;
Chris@42 111 {
Chris@42 112 E TU, Tu, TV, Tf;
Chris@42 113 TU = FNMS(KP500000000, Te, Tb);
Chris@42 114 Tf = Tb + Te;
Chris@42 115 Tu = Ts + Tt;
Chris@42 116 TV = Ts - Tt;
Chris@42 117 Tg = Ta + Tf;
Chris@42 118 TE = Ta - Tf;
Chris@42 119 TW = FMA(KP866025403, TV, TU);
Chris@42 120 T1C = FNMS(KP866025403, TV, TU);
Chris@42 121 Tv = Tr - Tu;
Chris@42 122 TR = FMA(KP500000000, Tu, Tr);
Chris@42 123 }
Chris@42 124 T1z = FMA(KP866025403, T12, T11);
Chris@42 125 T13 = FNMS(KP866025403, T12, T11);
Chris@42 126 T10 = FNMS(KP866025403, TZ, TY);
Chris@42 127 T1A = FMA(KP866025403, TZ, TY);
Chris@42 128 Tz = Tv - Tq;
Chris@42 129 Tw = Tq + Tv;
Chris@42 130 T1D = FMA(KP866025403, TS, TR);
Chris@42 131 TT = FNMS(KP866025403, TS, TR);
Chris@42 132 T1K = FNMS(KP176326980, T1z, T1A);
Chris@42 133 T1B = FMA(KP176326980, T1A, T1z);
Chris@42 134 T1L = FNMS(KP363970234, T1C, T1D);
Chris@42 135 T1E = FMA(KP363970234, T1D, T1C);
Chris@42 136 }
Chris@42 137 }
Chris@42 138 }
Chris@42 139 {
Chris@42 140 E T1d, T14, T1c, TX;
Chris@42 141 cr[0] = T5 + Tg;
Chris@42 142 T1d = FNMS(KP839099631, T10, T13);
Chris@42 143 T14 = FMA(KP839099631, T13, T10);
Chris@42 144 T1c = FMA(KP176326980, TT, TW);
Chris@42 145 TX = FNMS(KP176326980, TW, TT);
Chris@42 146 ci[0] = Tl + Tw;
Chris@42 147 {
Chris@42 148 E TL, TK, TJ, Ty, TD;
Chris@42 149 Ty = FNMS(KP500000000, Tg, T5);
Chris@42 150 TD = FNMS(KP500000000, Tw, Tl);
Chris@42 151 {
Chris@42 152 E Tx, TC, TA, TI, TF;
Chris@42 153 Tx = W[10];
Chris@42 154 TC = W[11];
Chris@42 155 TA = FNMS(KP866025403, Tz, Ty);
Chris@42 156 TI = FMA(KP866025403, Tz, Ty);
Chris@42 157 TF = FNMS(KP866025403, TE, TD);
Chris@42 158 TL = FMA(KP866025403, TE, TD);
Chris@42 159 {
Chris@42 160 E TH, TB, TG, TM;
Chris@42 161 TH = W[4];
Chris@42 162 TB = Tx * TA;
Chris@42 163 TK = W[5];
Chris@42 164 TG = Tx * TF;
Chris@42 165 TM = TH * TL;
Chris@42 166 TJ = TH * TI;
Chris@42 167 cr[WS(rs, 6)] = FNMS(TC, TF, TB);
Chris@42 168 ci[WS(rs, 6)] = FMA(TC, TA, TG);
Chris@42 169 ci[WS(rs, 3)] = FMA(TK, TI, TM);
Chris@42 170 }
Chris@42 171 }
Chris@42 172 cr[WS(rs, 3)] = FNMS(TK, TL, TJ);
Chris@42 173 {
Chris@42 174 E T1k, T1p, T1l, T1q, T1m;
Chris@42 175 {
Chris@42 176 E T1e, T1j, T15, T1o;
Chris@42 177 T1e = FNMS(KP777861913, T1d, T1c);
Chris@42 178 T1j = FMA(KP777861913, T1d, T1c);
Chris@42 179 T15 = FNMS(KP777861913, T14, TX);
Chris@42 180 T1o = FMA(KP777861913, T14, TX);
Chris@42 181 {
Chris@42 182 E TN, T16, T1f, T17, T1s, T1v, T18, T1i, T1n, T1r, T1u;
Chris@42 183 TN = W[0];
Chris@42 184 T16 = FNMS(KP984807753, T15, TQ);
Chris@42 185 T1i = FMA(KP492403876, T15, TQ);
Chris@42 186 T1f = FMA(KP984807753, T1e, T1b);
Chris@42 187 T1n = FNMS(KP492403876, T1e, T1b);
Chris@42 188 T17 = TN * T16;
Chris@42 189 T1s = FMA(KP852868531, T1j, T1i);
Chris@42 190 T1k = FNMS(KP852868531, T1j, T1i);
Chris@42 191 T1v = FMA(KP852868531, T1o, T1n);
Chris@42 192 T1p = FNMS(KP852868531, T1o, T1n);
Chris@42 193 T18 = W[1];
Chris@42 194 T1r = W[6];
Chris@42 195 T1u = W[7];
Chris@42 196 {
Chris@42 197 E T1h, T1g, T1w, T1t;
Chris@42 198 T1h = W[12];
Chris@42 199 cr[WS(rs, 1)] = FNMS(T18, T1f, T17);
Chris@42 200 T1g = T18 * T16;
Chris@42 201 T1w = T1r * T1v;
Chris@42 202 T1t = T1r * T1s;
Chris@42 203 T1l = T1h * T1k;
Chris@42 204 ci[WS(rs, 1)] = FMA(TN, T1f, T1g);
Chris@42 205 ci[WS(rs, 4)] = FMA(T1u, T1s, T1w);
Chris@42 206 cr[WS(rs, 4)] = FNMS(T1u, T1v, T1t);
Chris@42 207 T1q = T1h * T1p;
Chris@42 208 }
Chris@42 209 T1m = W[13];
Chris@42 210 }
Chris@42 211 }
Chris@42 212 {
Chris@42 213 E T1F, T1W, T1R, T1V, T1N, T1M, T1x, T1I;
Chris@42 214 T1F = FNMS(KP954188894, T1E, T1B);
Chris@42 215 T1W = FMA(KP954188894, T1E, T1B);
Chris@42 216 T1M = FNMS(KP954188894, T1L, T1K);
Chris@42 217 T1R = FMA(KP954188894, T1L, T1K);
Chris@42 218 ci[WS(rs, 7)] = FMA(T1m, T1k, T1q);
Chris@42 219 cr[WS(rs, 7)] = FNMS(T1m, T1p, T1l);
Chris@42 220 T1V = FNMS(KP492403876, T1M, T1J);
Chris@42 221 T1N = FMA(KP984807753, T1M, T1J);
Chris@42 222 T1x = W[2];
Chris@42 223 T1I = W[3];
Chris@42 224 {
Chris@42 225 E T23, T22, T20, T1Z, T24, T21;
Chris@42 226 T1X = FMA(KP852868531, T1W, T1V);
Chris@42 227 T23 = FNMS(KP852868531, T1W, T1V);
Chris@42 228 {
Chris@42 229 E T1G, T1Q, T1O, T1H;
Chris@42 230 T1G = FMA(KP984807753, T1F, T1y);
Chris@42 231 T1Q = FNMS(KP492403876, T1F, T1y);
Chris@42 232 T1O = T1x * T1N;
Chris@42 233 T22 = W[15];
Chris@42 234 T1H = T1x * T1G;
Chris@42 235 T20 = FMA(KP852868531, T1R, T1Q);
Chris@42 236 T1S = FNMS(KP852868531, T1R, T1Q);
Chris@42 237 ci[WS(rs, 2)] = FMA(T1I, T1G, T1O);
Chris@42 238 cr[WS(rs, 2)] = FNMS(T1I, T1N, T1H);
Chris@42 239 T1Z = W[14];
Chris@42 240 T24 = T22 * T20;
Chris@42 241 }
Chris@42 242 T1U = W[9];
Chris@42 243 T21 = T1Z * T20;
Chris@42 244 ci[WS(rs, 8)] = FMA(T1Z, T23, T24);
Chris@42 245 T1P = W[8];
Chris@42 246 T1Y = T1U * T1S;
Chris@42 247 cr[WS(rs, 8)] = FNMS(T22, T23, T21);
Chris@42 248 }
Chris@42 249 }
Chris@42 250 }
Chris@42 251 }
Chris@42 252 }
Chris@42 253 }
Chris@42 254 T1T = T1P * T1S;
Chris@42 255 ci[WS(rs, 5)] = FMA(T1P, T1X, T1Y);
Chris@42 256 cr[WS(rs, 5)] = FNMS(T1U, T1X, T1T);
Chris@42 257 }
Chris@42 258 }
Chris@42 259 }
Chris@42 260
Chris@42 261 static const tw_instr twinstr[] = {
Chris@42 262 {TW_FULL, 1, 9},
Chris@42 263 {TW_NEXT, 1, 0}
Chris@42 264 };
Chris@42 265
Chris@42 266 static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {24, 16, 72, 0} };
Chris@42 267
Chris@42 268 void X(codelet_hb_9) (planner *p) {
Chris@42 269 X(khc2hc_register) (p, hb_9, &desc);
Chris@42 270 }
Chris@42 271 #else /* HAVE_FMA */
Chris@42 272
Chris@42 273 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include hb.h */
Chris@42 274
Chris@42 275 /*
Chris@42 276 * This function contains 96 FP additions, 72 FP multiplications,
Chris@42 277 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@42 278 * 53 stack variables, 8 constants, and 36 memory accesses
Chris@42 279 */
Chris@42 280 #include "hb.h"
Chris@42 281
Chris@42 282 static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 283 {
Chris@42 284 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 285 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@42 286 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@42 287 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@42 288 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@42 289 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@42 290 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 291 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 292 {
Chris@42 293 INT m;
Chris@42 294 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 295 E T5, Tl, TM, T1o, T16, T1y, Ta, Tf, Tg, Tq, Tv, Tw, TT, T17, T1u;
Chris@42 296 E T1A, T1r, T1z, T10, T18;
Chris@42 297 {
Chris@42 298 E T1, Th, T4, T14, Tk, TL, TK, T15;
Chris@42 299 T1 = cr[0];
Chris@42 300 Th = ci[WS(rs, 8)];
Chris@42 301 {
Chris@42 302 E T2, T3, Ti, Tj;
Chris@42 303 T2 = cr[WS(rs, 3)];
Chris@42 304 T3 = ci[WS(rs, 2)];
Chris@42 305 T4 = T2 + T3;
Chris@42 306 T14 = KP866025403 * (T2 - T3);
Chris@42 307 Ti = ci[WS(rs, 5)];
Chris@42 308 Tj = cr[WS(rs, 6)];
Chris@42 309 Tk = Ti - Tj;
Chris@42 310 TL = KP866025403 * (Ti + Tj);
Chris@42 311 }
Chris@42 312 T5 = T1 + T4;
Chris@42 313 Tl = Th + Tk;
Chris@42 314 TK = FNMS(KP500000000, T4, T1);
Chris@42 315 TM = TK - TL;
Chris@42 316 T1o = TK + TL;
Chris@42 317 T15 = FNMS(KP500000000, Tk, Th);
Chris@42 318 T16 = T14 + T15;
Chris@42 319 T1y = T15 - T14;
Chris@42 320 }
Chris@42 321 {
Chris@42 322 E T6, T9, TN, TQ, Tm, Tp, TO, TR, Tb, Te, TU, TX, Tr, Tu, TV;
Chris@42 323 E TY;
Chris@42 324 {
Chris@42 325 E T7, T8, Tn, To;
Chris@42 326 T6 = cr[WS(rs, 1)];
Chris@42 327 T7 = cr[WS(rs, 4)];
Chris@42 328 T8 = ci[WS(rs, 1)];
Chris@42 329 T9 = T7 + T8;
Chris@42 330 TN = FNMS(KP500000000, T9, T6);
Chris@42 331 TQ = KP866025403 * (T7 - T8);
Chris@42 332 Tm = ci[WS(rs, 7)];
Chris@42 333 Tn = ci[WS(rs, 4)];
Chris@42 334 To = cr[WS(rs, 7)];
Chris@42 335 Tp = Tn - To;
Chris@42 336 TO = KP866025403 * (Tn + To);
Chris@42 337 TR = FNMS(KP500000000, Tp, Tm);
Chris@42 338 }
Chris@42 339 {
Chris@42 340 E Tc, Td, Ts, Tt;
Chris@42 341 Tb = cr[WS(rs, 2)];
Chris@42 342 Tc = ci[WS(rs, 3)];
Chris@42 343 Td = ci[0];
Chris@42 344 Te = Tc + Td;
Chris@42 345 TU = FNMS(KP500000000, Te, Tb);
Chris@42 346 TX = KP866025403 * (Tc - Td);
Chris@42 347 Tr = ci[WS(rs, 6)];
Chris@42 348 Ts = cr[WS(rs, 5)];
Chris@42 349 Tt = cr[WS(rs, 8)];
Chris@42 350 Tu = Ts + Tt;
Chris@42 351 TV = KP866025403 * (Ts - Tt);
Chris@42 352 TY = FMA(KP500000000, Tu, Tr);
Chris@42 353 }
Chris@42 354 {
Chris@42 355 E TP, TS, T1s, T1t;
Chris@42 356 Ta = T6 + T9;
Chris@42 357 Tf = Tb + Te;
Chris@42 358 Tg = Ta + Tf;
Chris@42 359 Tq = Tm + Tp;
Chris@42 360 Tv = Tr - Tu;
Chris@42 361 Tw = Tq + Tv;
Chris@42 362 TP = TN - TO;
Chris@42 363 TS = TQ + TR;
Chris@42 364 TT = FNMS(KP642787609, TS, KP766044443 * TP);
Chris@42 365 T17 = FMA(KP766044443, TS, KP642787609 * TP);
Chris@42 366 T1s = TU - TV;
Chris@42 367 T1t = TY - TX;
Chris@42 368 T1u = FMA(KP939692620, T1s, KP342020143 * T1t);
Chris@42 369 T1A = FNMS(KP939692620, T1t, KP342020143 * T1s);
Chris@42 370 {
Chris@42 371 E T1p, T1q, TW, TZ;
Chris@42 372 T1p = TN + TO;
Chris@42 373 T1q = TR - TQ;
Chris@42 374 T1r = FNMS(KP984807753, T1q, KP173648177 * T1p);
Chris@42 375 T1z = FMA(KP173648177, T1q, KP984807753 * T1p);
Chris@42 376 TW = TU + TV;
Chris@42 377 TZ = TX + TY;
Chris@42 378 T10 = FNMS(KP984807753, TZ, KP173648177 * TW);
Chris@42 379 T18 = FMA(KP984807753, TW, KP173648177 * TZ);
Chris@42 380 }
Chris@42 381 }
Chris@42 382 }
Chris@42 383 cr[0] = T5 + Tg;
Chris@42 384 ci[0] = Tl + Tw;
Chris@42 385 {
Chris@42 386 E TA, TG, TE, TI;
Chris@42 387 {
Chris@42 388 E Ty, Tz, TC, TD;
Chris@42 389 Ty = FNMS(KP500000000, Tg, T5);
Chris@42 390 Tz = KP866025403 * (Tv - Tq);
Chris@42 391 TA = Ty - Tz;
Chris@42 392 TG = Ty + Tz;
Chris@42 393 TC = FNMS(KP500000000, Tw, Tl);
Chris@42 394 TD = KP866025403 * (Ta - Tf);
Chris@42 395 TE = TC - TD;
Chris@42 396 TI = TD + TC;
Chris@42 397 }
Chris@42 398 {
Chris@42 399 E Tx, TB, TF, TH;
Chris@42 400 Tx = W[10];
Chris@42 401 TB = W[11];
Chris@42 402 cr[WS(rs, 6)] = FNMS(TB, TE, Tx * TA);
Chris@42 403 ci[WS(rs, 6)] = FMA(Tx, TE, TB * TA);
Chris@42 404 TF = W[4];
Chris@42 405 TH = W[5];
Chris@42 406 cr[WS(rs, 3)] = FNMS(TH, TI, TF * TG);
Chris@42 407 ci[WS(rs, 3)] = FMA(TF, TI, TH * TG);
Chris@42 408 }
Chris@42 409 }
Chris@42 410 {
Chris@42 411 E T1d, T1h, T12, T1c, T1a, T1g, T11, T19, TJ, T13;
Chris@42 412 T1d = KP866025403 * (T18 - T17);
Chris@42 413 T1h = KP866025403 * (TT - T10);
Chris@42 414 T11 = TT + T10;
Chris@42 415 T12 = TM + T11;
Chris@42 416 T1c = FNMS(KP500000000, T11, TM);
Chris@42 417 T19 = T17 + T18;
Chris@42 418 T1a = T16 + T19;
Chris@42 419 T1g = FNMS(KP500000000, T19, T16);
Chris@42 420 TJ = W[0];
Chris@42 421 T13 = W[1];
Chris@42 422 cr[WS(rs, 1)] = FNMS(T13, T1a, TJ * T12);
Chris@42 423 ci[WS(rs, 1)] = FMA(T13, T12, TJ * T1a);
Chris@42 424 {
Chris@42 425 E T1k, T1m, T1j, T1l;
Chris@42 426 T1k = T1c + T1d;
Chris@42 427 T1m = T1h + T1g;
Chris@42 428 T1j = W[6];
Chris@42 429 T1l = W[7];
Chris@42 430 cr[WS(rs, 4)] = FNMS(T1l, T1m, T1j * T1k);
Chris@42 431 ci[WS(rs, 4)] = FMA(T1j, T1m, T1l * T1k);
Chris@42 432 }
Chris@42 433 {
Chris@42 434 E T1e, T1i, T1b, T1f;
Chris@42 435 T1e = T1c - T1d;
Chris@42 436 T1i = T1g - T1h;
Chris@42 437 T1b = W[12];
Chris@42 438 T1f = W[13];
Chris@42 439 cr[WS(rs, 7)] = FNMS(T1f, T1i, T1b * T1e);
Chris@42 440 ci[WS(rs, 7)] = FMA(T1b, T1i, T1f * T1e);
Chris@42 441 }
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E T1F, T1J, T1w, T1E, T1C, T1I, T1v, T1B, T1n, T1x;
Chris@42 445 T1F = KP866025403 * (T1A - T1z);
Chris@42 446 T1J = KP866025403 * (T1r + T1u);
Chris@42 447 T1v = T1r - T1u;
Chris@42 448 T1w = T1o + T1v;
Chris@42 449 T1E = FNMS(KP500000000, T1v, T1o);
Chris@42 450 T1B = T1z + T1A;
Chris@42 451 T1C = T1y + T1B;
Chris@42 452 T1I = FNMS(KP500000000, T1B, T1y);
Chris@42 453 T1n = W[2];
Chris@42 454 T1x = W[3];
Chris@42 455 cr[WS(rs, 2)] = FNMS(T1x, T1C, T1n * T1w);
Chris@42 456 ci[WS(rs, 2)] = FMA(T1n, T1C, T1x * T1w);
Chris@42 457 {
Chris@42 458 E T1M, T1O, T1L, T1N;
Chris@42 459 T1M = T1F + T1E;
Chris@42 460 T1O = T1I + T1J;
Chris@42 461 T1L = W[8];
Chris@42 462 T1N = W[9];
Chris@42 463 cr[WS(rs, 5)] = FNMS(T1N, T1O, T1L * T1M);
Chris@42 464 ci[WS(rs, 5)] = FMA(T1N, T1M, T1L * T1O);
Chris@42 465 }
Chris@42 466 {
Chris@42 467 E T1G, T1K, T1D, T1H;
Chris@42 468 T1G = T1E - T1F;
Chris@42 469 T1K = T1I - T1J;
Chris@42 470 T1D = W[14];
Chris@42 471 T1H = W[15];
Chris@42 472 cr[WS(rs, 8)] = FNMS(T1H, T1K, T1D * T1G);
Chris@42 473 ci[WS(rs, 8)] = FMA(T1H, T1G, T1D * T1K);
Chris@42 474 }
Chris@42 475 }
Chris@42 476 }
Chris@42 477 }
Chris@42 478 }
Chris@42 479
Chris@42 480 static const tw_instr twinstr[] = {
Chris@42 481 {TW_FULL, 1, 9},
Chris@42 482 {TW_NEXT, 1, 0}
Chris@42 483 };
Chris@42 484
Chris@42 485 static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {60, 36, 36, 0} };
Chris@42 486
Chris@42 487 void X(codelet_hb_9) (planner *p) {
Chris@42 488 X(khc2hc_register) (p, hb_9, &desc);
Chris@42 489 }
Chris@42 490 #endif /* HAVE_FMA */