annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:18 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@42 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@42 33 * 69 stack variables, 10 constants, and 36 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@42 40 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@42 41 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@42 42 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@42 43 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 44 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@42 45 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@42 46 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@42 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 49 {
Chris@42 50 INT m;
Chris@42 51 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 52 E T20, T1Z;
Chris@42 53 {
Chris@42 54 E T1, T1P, T1Q, T10, T1S, Te, TB, T1d, T1a, T19, T1M, TE, T1c, Tz, T1n;
Chris@42 55 E TC, TH, TK, T1k, TR, TG, TJ, TD;
Chris@42 56 T1 = cr[0];
Chris@42 57 T1P = ci[0];
Chris@42 58 {
Chris@42 59 E T9, Tc, TY, Ta, Tb, TX, T7;
Chris@42 60 {
Chris@42 61 E T3, T6, T8, TW, T4, T2, T5;
Chris@42 62 T3 = cr[WS(rs, 3)];
Chris@42 63 T6 = ci[WS(rs, 3)];
Chris@42 64 T2 = W[4];
Chris@42 65 T9 = cr[WS(rs, 6)];
Chris@42 66 Tc = ci[WS(rs, 6)];
Chris@42 67 T8 = W[10];
Chris@42 68 TW = T2 * T6;
Chris@42 69 T4 = T2 * T3;
Chris@42 70 T5 = W[5];
Chris@42 71 TY = T8 * Tc;
Chris@42 72 Ta = T8 * T9;
Chris@42 73 Tb = W[11];
Chris@42 74 TX = FNMS(T5, T3, TW);
Chris@42 75 T7 = FMA(T5, T6, T4);
Chris@42 76 }
Chris@42 77 {
Chris@42 78 E Th, Tk, Ti, T12, Tn, Tq, Tp, T17, Tx, T14, To, Tj, TZ, Td, Tg;
Chris@42 79 E TA, Tl, Ty;
Chris@42 80 Th = cr[WS(rs, 1)];
Chris@42 81 TZ = FNMS(Tb, T9, TY);
Chris@42 82 Td = FMA(Tb, Tc, Ta);
Chris@42 83 Tk = ci[WS(rs, 1)];
Chris@42 84 Tg = W[0];
Chris@42 85 T1Q = TX + TZ;
Chris@42 86 T10 = TX - TZ;
Chris@42 87 T1S = Td - T7;
Chris@42 88 Te = T7 + Td;
Chris@42 89 Ti = Tg * Th;
Chris@42 90 T12 = Tg * Tk;
Chris@42 91 {
Chris@42 92 E Tt, Tw, Ts, Tv, T16, Tu, Tm;
Chris@42 93 Tt = cr[WS(rs, 7)];
Chris@42 94 Tw = ci[WS(rs, 7)];
Chris@42 95 Ts = W[12];
Chris@42 96 Tv = W[13];
Chris@42 97 Tn = cr[WS(rs, 4)];
Chris@42 98 Tq = ci[WS(rs, 4)];
Chris@42 99 T16 = Ts * Tw;
Chris@42 100 Tu = Ts * Tt;
Chris@42 101 Tm = W[6];
Chris@42 102 Tp = W[7];
Chris@42 103 T17 = FNMS(Tv, Tt, T16);
Chris@42 104 Tx = FMA(Tv, Tw, Tu);
Chris@42 105 T14 = Tm * Tq;
Chris@42 106 To = Tm * Tn;
Chris@42 107 }
Chris@42 108 Tj = W[1];
Chris@42 109 TB = cr[WS(rs, 2)];
Chris@42 110 {
Chris@42 111 E T15, Tr, T13, T18;
Chris@42 112 T15 = FNMS(Tp, Tn, T14);
Chris@42 113 Tr = FMA(Tp, Tq, To);
Chris@42 114 T13 = FNMS(Tj, Th, T12);
Chris@42 115 Tl = FMA(Tj, Tk, Ti);
Chris@42 116 T18 = T15 + T17;
Chris@42 117 T1d = T15 - T17;
Chris@42 118 Ty = Tr + Tx;
Chris@42 119 T1a = Tr - Tx;
Chris@42 120 T19 = FNMS(KP500000000, T18, T13);
Chris@42 121 T1M = T13 + T18;
Chris@42 122 TE = ci[WS(rs, 2)];
Chris@42 123 }
Chris@42 124 T1c = FNMS(KP500000000, Ty, Tl);
Chris@42 125 Tz = Tl + Ty;
Chris@42 126 TA = W[2];
Chris@42 127 {
Chris@42 128 E TN, TQ, TP, T1j, TO, TM;
Chris@42 129 TN = cr[WS(rs, 8)];
Chris@42 130 TQ = ci[WS(rs, 8)];
Chris@42 131 TM = W[14];
Chris@42 132 T1n = TA * TE;
Chris@42 133 TC = TA * TB;
Chris@42 134 TP = W[15];
Chris@42 135 T1j = TM * TQ;
Chris@42 136 TO = TM * TN;
Chris@42 137 TH = cr[WS(rs, 5)];
Chris@42 138 TK = ci[WS(rs, 5)];
Chris@42 139 T1k = FNMS(TP, TN, T1j);
Chris@42 140 TR = FMA(TP, TQ, TO);
Chris@42 141 TG = W[8];
Chris@42 142 TJ = W[9];
Chris@42 143 }
Chris@42 144 TD = W[3];
Chris@42 145 }
Chris@42 146 }
Chris@42 147 {
Chris@42 148 E TV, Tf, T21, T1R, T1l, T1r, T1q, T1N, TT, T1g;
Chris@42 149 {
Chris@42 150 E T1o, TF, T1i, TL, T1h, TI, TS, T1p;
Chris@42 151 TV = FNMS(KP500000000, Te, T1);
Chris@42 152 Tf = T1 + Te;
Chris@42 153 T1h = TG * TK;
Chris@42 154 TI = TG * TH;
Chris@42 155 T1o = FNMS(TD, TB, T1n);
Chris@42 156 TF = FMA(TD, TE, TC);
Chris@42 157 T1i = FNMS(TJ, TH, T1h);
Chris@42 158 TL = FMA(TJ, TK, TI);
Chris@42 159 T21 = T1Q + T1P;
Chris@42 160 T1R = FNMS(KP500000000, T1Q, T1P);
Chris@42 161 T1p = T1i + T1k;
Chris@42 162 T1l = T1i - T1k;
Chris@42 163 TS = TL + TR;
Chris@42 164 T1r = TR - TL;
Chris@42 165 T1q = FNMS(KP500000000, T1p, T1o);
Chris@42 166 T1N = T1o + T1p;
Chris@42 167 TT = TF + TS;
Chris@42 168 T1g = FNMS(KP500000000, TS, TF);
Chris@42 169 }
Chris@42 170 {
Chris@42 171 E T11, T1z, T1E, T1D, T1X, T1T, T1I, T1C, T1Y, T1y, T1u, T24, TU;
Chris@42 172 T24 = TT - Tz;
Chris@42 173 TU = Tz + TT;
Chris@42 174 {
Chris@42 175 E T22, T1O, T1L, T23;
Chris@42 176 T22 = T1M + T1N;
Chris@42 177 T1O = T1M - T1N;
Chris@42 178 T11 = FNMS(KP866025403, T10, TV);
Chris@42 179 T1z = FMA(KP866025403, T10, TV);
Chris@42 180 T1L = FNMS(KP500000000, TU, Tf);
Chris@42 181 cr[0] = Tf + TU;
Chris@42 182 T23 = FNMS(KP500000000, T22, T21);
Chris@42 183 ci[WS(rs, 8)] = T22 + T21;
Chris@42 184 cr[WS(rs, 3)] = FMA(KP866025403, T1O, T1L);
Chris@42 185 ci[WS(rs, 2)] = FNMS(KP866025403, T1O, T1L);
Chris@42 186 ci[WS(rs, 5)] = FMA(KP866025403, T24, T23);
Chris@42 187 cr[WS(rs, 6)] = FMS(KP866025403, T24, T23);
Chris@42 188 }
Chris@42 189 {
Chris@42 190 E T1B, T1m, T1w, T1f, T1s, T1A, T1b, T1e, T1x, T1t;
Chris@42 191 T1E = FNMS(KP866025403, T1a, T19);
Chris@42 192 T1b = FMA(KP866025403, T1a, T19);
Chris@42 193 T1e = FNMS(KP866025403, T1d, T1c);
Chris@42 194 T1D = FMA(KP866025403, T1d, T1c);
Chris@42 195 T1B = FMA(KP866025403, T1l, T1g);
Chris@42 196 T1m = FNMS(KP866025403, T1l, T1g);
Chris@42 197 T1X = FNMS(KP866025403, T1S, T1R);
Chris@42 198 T1T = FMA(KP866025403, T1S, T1R);
Chris@42 199 T1w = FNMS(KP176326980, T1b, T1e);
Chris@42 200 T1f = FMA(KP176326980, T1e, T1b);
Chris@42 201 T1s = FNMS(KP866025403, T1r, T1q);
Chris@42 202 T1A = FMA(KP866025403, T1r, T1q);
Chris@42 203 T1x = FMA(KP363970234, T1m, T1s);
Chris@42 204 T1t = FNMS(KP363970234, T1s, T1m);
Chris@42 205 T1I = FNMS(KP176326980, T1A, T1B);
Chris@42 206 T1C = FMA(KP176326980, T1B, T1A);
Chris@42 207 T1Y = FMA(KP954188894, T1x, T1w);
Chris@42 208 T1y = FNMS(KP954188894, T1x, T1w);
Chris@42 209 T20 = FMA(KP954188894, T1t, T1f);
Chris@42 210 T1u = FNMS(KP954188894, T1t, T1f);
Chris@42 211 }
Chris@42 212 {
Chris@42 213 E T1F, T1J, T1v, T1U, T1K;
Chris@42 214 ci[WS(rs, 6)] = FNMS(KP984807753, T1Y, T1X);
Chris@42 215 T1v = FNMS(KP492403876, T1u, T11);
Chris@42 216 cr[WS(rs, 2)] = FMA(KP984807753, T1u, T11);
Chris@42 217 T1F = FMA(KP839099631, T1E, T1D);
Chris@42 218 T1J = FNMS(KP839099631, T1D, T1E);
Chris@42 219 ci[WS(rs, 3)] = FNMS(KP852868531, T1y, T1v);
Chris@42 220 ci[0] = FMA(KP852868531, T1y, T1v);
Chris@42 221 T1U = FNMS(KP777861913, T1J, T1I);
Chris@42 222 T1K = FMA(KP777861913, T1J, T1I);
Chris@42 223 {
Chris@42 224 E T1G, T1W, T1V, T1H;
Chris@42 225 T1G = FMA(KP777861913, T1F, T1C);
Chris@42 226 T1W = FNMS(KP777861913, T1F, T1C);
Chris@42 227 T1Z = FMA(KP492403876, T1Y, T1X);
Chris@42 228 T1V = FMA(KP492403876, T1U, T1T);
Chris@42 229 ci[WS(rs, 7)] = FNMS(KP984807753, T1U, T1T);
Chris@42 230 T1H = FNMS(KP492403876, T1G, T1z);
Chris@42 231 cr[WS(rs, 1)] = FMA(KP984807753, T1G, T1z);
Chris@42 232 ci[WS(rs, 4)] = FMA(KP852868531, T1W, T1V);
Chris@42 233 cr[WS(rs, 7)] = FMS(KP852868531, T1W, T1V);
Chris@42 234 cr[WS(rs, 4)] = FMA(KP852868531, T1K, T1H);
Chris@42 235 ci[WS(rs, 1)] = FNMS(KP852868531, T1K, T1H);
Chris@42 236 }
Chris@42 237 }
Chris@42 238 }
Chris@42 239 }
Chris@42 240 }
Chris@42 241 cr[WS(rs, 8)] = -(FMA(KP852868531, T20, T1Z));
Chris@42 242 cr[WS(rs, 5)] = FMS(KP852868531, T20, T1Z);
Chris@42 243 }
Chris@42 244 }
Chris@42 245 }
Chris@42 246
Chris@42 247 static const tw_instr twinstr[] = {
Chris@42 248 {TW_FULL, 1, 9},
Chris@42 249 {TW_NEXT, 1, 0}
Chris@42 250 };
Chris@42 251
Chris@42 252 static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {24, 16, 72, 0} };
Chris@42 253
Chris@42 254 void X(codelet_hf_9) (planner *p) {
Chris@42 255 X(khc2hc_register) (p, hf_9, &desc);
Chris@42 256 }
Chris@42 257 #else /* HAVE_FMA */
Chris@42 258
Chris@42 259 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include hf.h */
Chris@42 260
Chris@42 261 /*
Chris@42 262 * This function contains 96 FP additions, 72 FP multiplications,
Chris@42 263 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@42 264 * 41 stack variables, 8 constants, and 36 memory accesses
Chris@42 265 */
Chris@42 266 #include "hf.h"
Chris@42 267
Chris@42 268 static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 269 {
Chris@42 270 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@42 271 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@42 272 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@42 273 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@42 274 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 275 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@42 276 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 277 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 278 {
Chris@42 279 INT m;
Chris@42 280 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 281 E T1, T1B, TQ, T1A, Tc, TN, T1C, T1D, TL, T1x, T19, T1o, T1c, T1n, Tu;
Chris@42 282 E T1w, TW, T1k, T11, T1l;
Chris@42 283 {
Chris@42 284 E T6, TO, Tb, TP;
Chris@42 285 T1 = cr[0];
Chris@42 286 T1B = ci[0];
Chris@42 287 {
Chris@42 288 E T3, T5, T2, T4;
Chris@42 289 T3 = cr[WS(rs, 3)];
Chris@42 290 T5 = ci[WS(rs, 3)];
Chris@42 291 T2 = W[4];
Chris@42 292 T4 = W[5];
Chris@42 293 T6 = FMA(T2, T3, T4 * T5);
Chris@42 294 TO = FNMS(T4, T3, T2 * T5);
Chris@42 295 }
Chris@42 296 {
Chris@42 297 E T8, Ta, T7, T9;
Chris@42 298 T8 = cr[WS(rs, 6)];
Chris@42 299 Ta = ci[WS(rs, 6)];
Chris@42 300 T7 = W[10];
Chris@42 301 T9 = W[11];
Chris@42 302 Tb = FMA(T7, T8, T9 * Ta);
Chris@42 303 TP = FNMS(T9, T8, T7 * Ta);
Chris@42 304 }
Chris@42 305 TQ = KP866025403 * (TO - TP);
Chris@42 306 T1A = KP866025403 * (Tb - T6);
Chris@42 307 Tc = T6 + Tb;
Chris@42 308 TN = FNMS(KP500000000, Tc, T1);
Chris@42 309 T1C = TO + TP;
Chris@42 310 T1D = FNMS(KP500000000, T1C, T1B);
Chris@42 311 }
Chris@42 312 {
Chris@42 313 E Tz, T13, TE, T14, TJ, T15, TK, T16;
Chris@42 314 {
Chris@42 315 E Tw, Ty, Tv, Tx;
Chris@42 316 Tw = cr[WS(rs, 2)];
Chris@42 317 Ty = ci[WS(rs, 2)];
Chris@42 318 Tv = W[2];
Chris@42 319 Tx = W[3];
Chris@42 320 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 321 T13 = FNMS(Tx, Tw, Tv * Ty);
Chris@42 322 }
Chris@42 323 {
Chris@42 324 E TB, TD, TA, TC;
Chris@42 325 TB = cr[WS(rs, 5)];
Chris@42 326 TD = ci[WS(rs, 5)];
Chris@42 327 TA = W[8];
Chris@42 328 TC = W[9];
Chris@42 329 TE = FMA(TA, TB, TC * TD);
Chris@42 330 T14 = FNMS(TC, TB, TA * TD);
Chris@42 331 }
Chris@42 332 {
Chris@42 333 E TG, TI, TF, TH;
Chris@42 334 TG = cr[WS(rs, 8)];
Chris@42 335 TI = ci[WS(rs, 8)];
Chris@42 336 TF = W[14];
Chris@42 337 TH = W[15];
Chris@42 338 TJ = FMA(TF, TG, TH * TI);
Chris@42 339 T15 = FNMS(TH, TG, TF * TI);
Chris@42 340 }
Chris@42 341 TK = TE + TJ;
Chris@42 342 T16 = T14 + T15;
Chris@42 343 TL = Tz + TK;
Chris@42 344 T1x = T13 + T16;
Chris@42 345 {
Chris@42 346 E T17, T18, T1a, T1b;
Chris@42 347 T17 = FNMS(KP500000000, T16, T13);
Chris@42 348 T18 = KP866025403 * (TJ - TE);
Chris@42 349 T19 = T17 - T18;
Chris@42 350 T1o = T18 + T17;
Chris@42 351 T1a = FNMS(KP500000000, TK, Tz);
Chris@42 352 T1b = KP866025403 * (T14 - T15);
Chris@42 353 T1c = T1a - T1b;
Chris@42 354 T1n = T1a + T1b;
Chris@42 355 }
Chris@42 356 }
Chris@42 357 {
Chris@42 358 E Ti, TX, Tn, TT, Ts, TU, Tt, TY;
Chris@42 359 {
Chris@42 360 E Tf, Th, Te, Tg;
Chris@42 361 Tf = cr[WS(rs, 1)];
Chris@42 362 Th = ci[WS(rs, 1)];
Chris@42 363 Te = W[0];
Chris@42 364 Tg = W[1];
Chris@42 365 Ti = FMA(Te, Tf, Tg * Th);
Chris@42 366 TX = FNMS(Tg, Tf, Te * Th);
Chris@42 367 }
Chris@42 368 {
Chris@42 369 E Tk, Tm, Tj, Tl;
Chris@42 370 Tk = cr[WS(rs, 4)];
Chris@42 371 Tm = ci[WS(rs, 4)];
Chris@42 372 Tj = W[6];
Chris@42 373 Tl = W[7];
Chris@42 374 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 375 TT = FNMS(Tl, Tk, Tj * Tm);
Chris@42 376 }
Chris@42 377 {
Chris@42 378 E Tp, Tr, To, Tq;
Chris@42 379 Tp = cr[WS(rs, 7)];
Chris@42 380 Tr = ci[WS(rs, 7)];
Chris@42 381 To = W[12];
Chris@42 382 Tq = W[13];
Chris@42 383 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 384 TU = FNMS(Tq, Tp, To * Tr);
Chris@42 385 }
Chris@42 386 Tt = Tn + Ts;
Chris@42 387 TY = TT + TU;
Chris@42 388 Tu = Ti + Tt;
Chris@42 389 T1w = TX + TY;
Chris@42 390 {
Chris@42 391 E TS, TV, TZ, T10;
Chris@42 392 TS = FNMS(KP500000000, Tt, Ti);
Chris@42 393 TV = KP866025403 * (TT - TU);
Chris@42 394 TW = TS - TV;
Chris@42 395 T1k = TS + TV;
Chris@42 396 TZ = FNMS(KP500000000, TY, TX);
Chris@42 397 T10 = KP866025403 * (Ts - Tn);
Chris@42 398 T11 = TZ - T10;
Chris@42 399 T1l = T10 + TZ;
Chris@42 400 }
Chris@42 401 }
Chris@42 402 {
Chris@42 403 E T1y, Td, TM, T1v;
Chris@42 404 T1y = KP866025403 * (T1w - T1x);
Chris@42 405 Td = T1 + Tc;
Chris@42 406 TM = Tu + TL;
Chris@42 407 T1v = FNMS(KP500000000, TM, Td);
Chris@42 408 cr[0] = Td + TM;
Chris@42 409 cr[WS(rs, 3)] = T1v + T1y;
Chris@42 410 ci[WS(rs, 2)] = T1v - T1y;
Chris@42 411 }
Chris@42 412 {
Chris@42 413 E TR, T1I, T1e, T1K, T1i, T1H, T1f, T1J;
Chris@42 414 TR = TN - TQ;
Chris@42 415 T1I = T1D - T1A;
Chris@42 416 {
Chris@42 417 E T12, T1d, T1g, T1h;
Chris@42 418 T12 = FMA(KP173648177, TW, KP984807753 * T11);
Chris@42 419 T1d = FNMS(KP939692620, T1c, KP342020143 * T19);
Chris@42 420 T1e = T12 + T1d;
Chris@42 421 T1K = KP866025403 * (T1d - T12);
Chris@42 422 T1g = FNMS(KP984807753, TW, KP173648177 * T11);
Chris@42 423 T1h = FMA(KP342020143, T1c, KP939692620 * T19);
Chris@42 424 T1i = KP866025403 * (T1g + T1h);
Chris@42 425 T1H = T1g - T1h;
Chris@42 426 }
Chris@42 427 cr[WS(rs, 2)] = TR + T1e;
Chris@42 428 ci[WS(rs, 6)] = T1H + T1I;
Chris@42 429 T1f = FNMS(KP500000000, T1e, TR);
Chris@42 430 ci[0] = T1f - T1i;
Chris@42 431 ci[WS(rs, 3)] = T1f + T1i;
Chris@42 432 T1J = FMS(KP500000000, T1H, T1I);
Chris@42 433 cr[WS(rs, 5)] = T1J - T1K;
Chris@42 434 cr[WS(rs, 8)] = T1K + T1J;
Chris@42 435 }
Chris@42 436 {
Chris@42 437 E T1L, T1M, T1N, T1O;
Chris@42 438 T1L = KP866025403 * (TL - Tu);
Chris@42 439 T1M = T1C + T1B;
Chris@42 440 T1N = T1w + T1x;
Chris@42 441 T1O = FNMS(KP500000000, T1N, T1M);
Chris@42 442 cr[WS(rs, 6)] = T1L - T1O;
Chris@42 443 ci[WS(rs, 8)] = T1N + T1M;
Chris@42 444 ci[WS(rs, 5)] = T1L + T1O;
Chris@42 445 }
Chris@42 446 {
Chris@42 447 E T1j, T1E, T1q, T1z, T1u, T1F, T1r, T1G;
Chris@42 448 T1j = TN + TQ;
Chris@42 449 T1E = T1A + T1D;
Chris@42 450 {
Chris@42 451 E T1m, T1p, T1s, T1t;
Chris@42 452 T1m = FMA(KP766044443, T1k, KP642787609 * T1l);
Chris@42 453 T1p = FMA(KP173648177, T1n, KP984807753 * T1o);
Chris@42 454 T1q = T1m + T1p;
Chris@42 455 T1z = KP866025403 * (T1p - T1m);
Chris@42 456 T1s = FNMS(KP642787609, T1k, KP766044443 * T1l);
Chris@42 457 T1t = FNMS(KP984807753, T1n, KP173648177 * T1o);
Chris@42 458 T1u = KP866025403 * (T1s - T1t);
Chris@42 459 T1F = T1s + T1t;
Chris@42 460 }
Chris@42 461 cr[WS(rs, 1)] = T1j + T1q;
Chris@42 462 T1r = FNMS(KP500000000, T1q, T1j);
Chris@42 463 ci[WS(rs, 1)] = T1r - T1u;
Chris@42 464 cr[WS(rs, 4)] = T1r + T1u;
Chris@42 465 ci[WS(rs, 7)] = T1F + T1E;
Chris@42 466 T1G = FNMS(KP500000000, T1F, T1E);
Chris@42 467 cr[WS(rs, 7)] = T1z - T1G;
Chris@42 468 ci[WS(rs, 4)] = T1z + T1G;
Chris@42 469 }
Chris@42 470 }
Chris@42 471 }
Chris@42 472 }
Chris@42 473
Chris@42 474 static const tw_instr twinstr[] = {
Chris@42 475 {TW_FULL, 1, 9},
Chris@42 476 {TW_NEXT, 1, 0}
Chris@42 477 };
Chris@42 478
Chris@42 479 static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {60, 36, 36, 0} };
Chris@42 480
Chris@42 481 void X(codelet_hf_9) (planner *p) {
Chris@42 482 X(khc2hc_register) (p, hf_9, &desc);
Chris@42 483 }
Chris@42 484 #endif /* HAVE_FMA */