annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:32 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@82 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@82 33 * 53 stack variables, 10 constants, and 36 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@82 40 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@82 41 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 42 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@82 43 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@82 44 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@82 45 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@82 46 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@82 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 49 {
Chris@82 50 INT m;
Chris@82 51 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 52 E T5, Tl, TQ, T1y, T1b, T1J, Tg, TE, Tw, Tz, T1E, T1L, T1B, T1K, T14;
Chris@82 53 E T1d, TX, T1c;
Chris@82 54 {
Chris@82 55 E T1, Th, T4, T1a, Tk, TP, TO, T19;
Chris@82 56 T1 = cr[0];
Chris@82 57 Th = ci[WS(rs, 8)];
Chris@82 58 {
Chris@82 59 E T2, T3, Ti, Tj;
Chris@82 60 T2 = cr[WS(rs, 3)];
Chris@82 61 T3 = ci[WS(rs, 2)];
Chris@82 62 T4 = T2 + T3;
Chris@82 63 T1a = T2 - T3;
Chris@82 64 Ti = ci[WS(rs, 5)];
Chris@82 65 Tj = cr[WS(rs, 6)];
Chris@82 66 Tk = Ti - Tj;
Chris@82 67 TP = Ti + Tj;
Chris@82 68 }
Chris@82 69 T5 = T1 + T4;
Chris@82 70 Tl = Th + Tk;
Chris@82 71 TO = FNMS(KP500000000, T4, T1);
Chris@82 72 TQ = FNMS(KP866025403, TP, TO);
Chris@82 73 T1y = FMA(KP866025403, TP, TO);
Chris@82 74 T19 = FNMS(KP500000000, Tk, Th);
Chris@82 75 T1b = FMA(KP866025403, T1a, T19);
Chris@82 76 T1J = FNMS(KP866025403, T1a, T19);
Chris@82 77 }
Chris@82 78 {
Chris@82 79 E T6, T9, TY, T12, Tm, Tp, TZ, T11, Tb, Te, TS, TU, Tr, Tu, TR;
Chris@82 80 E TV;
Chris@82 81 {
Chris@82 82 E T7, T8, Tn, To;
Chris@82 83 T6 = cr[WS(rs, 1)];
Chris@82 84 T7 = cr[WS(rs, 4)];
Chris@82 85 T8 = ci[WS(rs, 1)];
Chris@82 86 T9 = T7 + T8;
Chris@82 87 TY = FNMS(KP500000000, T9, T6);
Chris@82 88 T12 = T7 - T8;
Chris@82 89 Tm = ci[WS(rs, 7)];
Chris@82 90 Tn = ci[WS(rs, 4)];
Chris@82 91 To = cr[WS(rs, 7)];
Chris@82 92 Tp = Tn - To;
Chris@82 93 TZ = Tn + To;
Chris@82 94 T11 = FMS(KP500000000, Tp, Tm);
Chris@82 95 }
Chris@82 96 {
Chris@82 97 E Tc, Td, Ts, Tt;
Chris@82 98 Tb = cr[WS(rs, 2)];
Chris@82 99 Tc = ci[WS(rs, 3)];
Chris@82 100 Td = ci[0];
Chris@82 101 Te = Tc + Td;
Chris@82 102 TS = Td - Tc;
Chris@82 103 TU = FNMS(KP500000000, Te, Tb);
Chris@82 104 Tr = ci[WS(rs, 6)];
Chris@82 105 Ts = cr[WS(rs, 5)];
Chris@82 106 Tt = cr[WS(rs, 8)];
Chris@82 107 Tu = Ts + Tt;
Chris@82 108 TR = FMA(KP500000000, Tu, Tr);
Chris@82 109 TV = Ts - Tt;
Chris@82 110 }
Chris@82 111 {
Chris@82 112 E Ta, Tf, T1z, T1A;
Chris@82 113 Ta = T6 + T9;
Chris@82 114 Tf = Tb + Te;
Chris@82 115 Tg = Ta + Tf;
Chris@82 116 TE = Ta - Tf;
Chris@82 117 {
Chris@82 118 E Tq, Tv, T1C, T1D;
Chris@82 119 Tq = Tm + Tp;
Chris@82 120 Tv = Tr - Tu;
Chris@82 121 Tw = Tq + Tv;
Chris@82 122 Tz = Tv - Tq;
Chris@82 123 T1C = FNMS(KP866025403, TV, TU);
Chris@82 124 T1D = FMA(KP866025403, TS, TR);
Chris@82 125 T1E = FMA(KP363970234, T1D, T1C);
Chris@82 126 T1L = FNMS(KP363970234, T1C, T1D);
Chris@82 127 }
Chris@82 128 T1z = FMA(KP866025403, T12, T11);
Chris@82 129 T1A = FMA(KP866025403, TZ, TY);
Chris@82 130 T1B = FMA(KP176326980, T1A, T1z);
Chris@82 131 T1K = FNMS(KP176326980, T1z, T1A);
Chris@82 132 {
Chris@82 133 E T10, T13, TT, TW;
Chris@82 134 T10 = FNMS(KP866025403, TZ, TY);
Chris@82 135 T13 = FNMS(KP866025403, T12, T11);
Chris@82 136 T14 = FMA(KP839099631, T13, T10);
Chris@82 137 T1d = FNMS(KP839099631, T10, T13);
Chris@82 138 TT = FNMS(KP866025403, TS, TR);
Chris@82 139 TW = FMA(KP866025403, TV, TU);
Chris@82 140 TX = FNMS(KP176326980, TW, TT);
Chris@82 141 T1c = FMA(KP176326980, TT, TW);
Chris@82 142 }
Chris@82 143 }
Chris@82 144 }
Chris@82 145 cr[0] = T5 + Tg;
Chris@82 146 ci[0] = Tl + Tw;
Chris@82 147 {
Chris@82 148 E TA, TI, TF, TL, Ty, TD;
Chris@82 149 Ty = FNMS(KP500000000, Tg, T5);
Chris@82 150 TA = FNMS(KP866025403, Tz, Ty);
Chris@82 151 TI = FMA(KP866025403, Tz, Ty);
Chris@82 152 TD = FNMS(KP500000000, Tw, Tl);
Chris@82 153 TF = FNMS(KP866025403, TE, TD);
Chris@82 154 TL = FMA(KP866025403, TE, TD);
Chris@82 155 {
Chris@82 156 E TB, TG, Tx, TC;
Chris@82 157 Tx = W[10];
Chris@82 158 TB = Tx * TA;
Chris@82 159 TG = Tx * TF;
Chris@82 160 TC = W[11];
Chris@82 161 cr[WS(rs, 6)] = FNMS(TC, TF, TB);
Chris@82 162 ci[WS(rs, 6)] = FMA(TC, TA, TG);
Chris@82 163 }
Chris@82 164 {
Chris@82 165 E TJ, TM, TH, TK;
Chris@82 166 TH = W[4];
Chris@82 167 TJ = TH * TI;
Chris@82 168 TM = TH * TL;
Chris@82 169 TK = W[5];
Chris@82 170 cr[WS(rs, 3)] = FNMS(TK, TL, TJ);
Chris@82 171 ci[WS(rs, 3)] = FMA(TK, TI, TM);
Chris@82 172 }
Chris@82 173 }
Chris@82 174 {
Chris@82 175 E T16, T1s, T1k, T1f, T1v, T1p;
Chris@82 176 {
Chris@82 177 E T1j, T15, T1i, T1o, T1e, T1n;
Chris@82 178 T1j = FMA(KP777861913, T1d, T1c);
Chris@82 179 T15 = FNMS(KP777861913, T14, TX);
Chris@82 180 T1i = FMA(KP492403876, T15, TQ);
Chris@82 181 T16 = FNMS(KP984807753, T15, TQ);
Chris@82 182 T1s = FMA(KP852868531, T1j, T1i);
Chris@82 183 T1k = FNMS(KP852868531, T1j, T1i);
Chris@82 184 T1o = FMA(KP777861913, T14, TX);
Chris@82 185 T1e = FNMS(KP777861913, T1d, T1c);
Chris@82 186 T1n = FNMS(KP492403876, T1e, T1b);
Chris@82 187 T1f = FMA(KP984807753, T1e, T1b);
Chris@82 188 T1v = FMA(KP852868531, T1o, T1n);
Chris@82 189 T1p = FNMS(KP852868531, T1o, T1n);
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E TN, T17, T18, T1g;
Chris@82 193 TN = W[0];
Chris@82 194 T17 = TN * T16;
Chris@82 195 T18 = W[1];
Chris@82 196 T1g = T18 * T16;
Chris@82 197 cr[WS(rs, 1)] = FNMS(T18, T1f, T17);
Chris@82 198 ci[WS(rs, 1)] = FMA(TN, T1f, T1g);
Chris@82 199 }
Chris@82 200 {
Chris@82 201 E T1t, T1w, T1r, T1u;
Chris@82 202 T1r = W[6];
Chris@82 203 T1t = T1r * T1s;
Chris@82 204 T1w = T1r * T1v;
Chris@82 205 T1u = W[7];
Chris@82 206 cr[WS(rs, 4)] = FNMS(T1u, T1v, T1t);
Chris@82 207 ci[WS(rs, 4)] = FMA(T1u, T1s, T1w);
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E T1l, T1q, T1h, T1m;
Chris@82 211 T1h = W[12];
Chris@82 212 T1l = T1h * T1k;
Chris@82 213 T1q = T1h * T1p;
Chris@82 214 T1m = W[13];
Chris@82 215 cr[WS(rs, 7)] = FNMS(T1m, T1p, T1l);
Chris@82 216 ci[WS(rs, 7)] = FMA(T1m, T1k, T1q);
Chris@82 217 }
Chris@82 218 }
Chris@82 219 {
Chris@82 220 E T1W, T1N, T1V, T1G, T20, T1S;
Chris@82 221 T1W = FMA(KP954188894, T1E, T1B);
Chris@82 222 {
Chris@82 223 E T1M, T1R, T1F, T1Q;
Chris@82 224 T1M = FNMS(KP954188894, T1L, T1K);
Chris@82 225 T1N = FMA(KP984807753, T1M, T1J);
Chris@82 226 T1V = FNMS(KP492403876, T1M, T1J);
Chris@82 227 T1R = FMA(KP954188894, T1L, T1K);
Chris@82 228 T1F = FNMS(KP954188894, T1E, T1B);
Chris@82 229 T1Q = FNMS(KP492403876, T1F, T1y);
Chris@82 230 T1G = FMA(KP984807753, T1F, T1y);
Chris@82 231 T20 = FMA(KP852868531, T1R, T1Q);
Chris@82 232 T1S = FNMS(KP852868531, T1R, T1Q);
Chris@82 233 }
Chris@82 234 {
Chris@82 235 E T1H, T1O, T1x, T1I;
Chris@82 236 T1x = W[2];
Chris@82 237 T1H = T1x * T1G;
Chris@82 238 T1O = T1x * T1N;
Chris@82 239 T1I = W[3];
Chris@82 240 cr[WS(rs, 2)] = FNMS(T1I, T1N, T1H);
Chris@82 241 ci[WS(rs, 2)] = FMA(T1I, T1G, T1O);
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E T23, T22, T24, T1Z, T21;
Chris@82 245 T23 = FNMS(KP852868531, T1W, T1V);
Chris@82 246 T22 = W[15];
Chris@82 247 T24 = T22 * T20;
Chris@82 248 T1Z = W[14];
Chris@82 249 T21 = T1Z * T20;
Chris@82 250 cr[WS(rs, 8)] = FNMS(T22, T23, T21);
Chris@82 251 ci[WS(rs, 8)] = FMA(T1Z, T23, T24);
Chris@82 252 }
Chris@82 253 {
Chris@82 254 E T1X, T1U, T1Y, T1P, T1T;
Chris@82 255 T1X = FMA(KP852868531, T1W, T1V);
Chris@82 256 T1U = W[9];
Chris@82 257 T1Y = T1U * T1S;
Chris@82 258 T1P = W[8];
Chris@82 259 T1T = T1P * T1S;
Chris@82 260 cr[WS(rs, 5)] = FNMS(T1U, T1X, T1T);
Chris@82 261 ci[WS(rs, 5)] = FMA(T1P, T1X, T1Y);
Chris@82 262 }
Chris@82 263 }
Chris@82 264 }
Chris@82 265 }
Chris@82 266 }
Chris@82 267
Chris@82 268 static const tw_instr twinstr[] = {
Chris@82 269 {TW_FULL, 1, 9},
Chris@82 270 {TW_NEXT, 1, 0}
Chris@82 271 };
Chris@82 272
Chris@82 273 static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {24, 16, 72, 0} };
Chris@82 274
Chris@82 275 void X(codelet_hb_9) (planner *p) {
Chris@82 276 X(khc2hc_register) (p, hb_9, &desc);
Chris@82 277 }
Chris@82 278 #else
Chris@82 279
Chris@82 280 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -dif -name hb_9 -include rdft/scalar/hb.h */
Chris@82 281
Chris@82 282 /*
Chris@82 283 * This function contains 96 FP additions, 72 FP multiplications,
Chris@82 284 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@82 285 * 53 stack variables, 8 constants, and 36 memory accesses
Chris@82 286 */
Chris@82 287 #include "rdft/scalar/hb.h"
Chris@82 288
Chris@82 289 static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 290 {
Chris@82 291 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 292 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@82 293 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@82 294 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@82 295 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@82 296 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 297 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 298 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 299 {
Chris@82 300 INT m;
Chris@82 301 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 302 E T5, Tl, TM, T1o, T16, T1y, Ta, Tf, Tg, Tq, Tv, Tw, TT, T17, T1u;
Chris@82 303 E T1A, T1r, T1z, T10, T18;
Chris@82 304 {
Chris@82 305 E T1, Th, T4, T14, Tk, TL, TK, T15;
Chris@82 306 T1 = cr[0];
Chris@82 307 Th = ci[WS(rs, 8)];
Chris@82 308 {
Chris@82 309 E T2, T3, Ti, Tj;
Chris@82 310 T2 = cr[WS(rs, 3)];
Chris@82 311 T3 = ci[WS(rs, 2)];
Chris@82 312 T4 = T2 + T3;
Chris@82 313 T14 = KP866025403 * (T2 - T3);
Chris@82 314 Ti = ci[WS(rs, 5)];
Chris@82 315 Tj = cr[WS(rs, 6)];
Chris@82 316 Tk = Ti - Tj;
Chris@82 317 TL = KP866025403 * (Ti + Tj);
Chris@82 318 }
Chris@82 319 T5 = T1 + T4;
Chris@82 320 Tl = Th + Tk;
Chris@82 321 TK = FNMS(KP500000000, T4, T1);
Chris@82 322 TM = TK - TL;
Chris@82 323 T1o = TK + TL;
Chris@82 324 T15 = FNMS(KP500000000, Tk, Th);
Chris@82 325 T16 = T14 + T15;
Chris@82 326 T1y = T15 - T14;
Chris@82 327 }
Chris@82 328 {
Chris@82 329 E T6, T9, TN, TQ, Tm, Tp, TO, TR, Tb, Te, TU, TX, Tr, Tu, TV;
Chris@82 330 E TY;
Chris@82 331 {
Chris@82 332 E T7, T8, Tn, To;
Chris@82 333 T6 = cr[WS(rs, 1)];
Chris@82 334 T7 = cr[WS(rs, 4)];
Chris@82 335 T8 = ci[WS(rs, 1)];
Chris@82 336 T9 = T7 + T8;
Chris@82 337 TN = FNMS(KP500000000, T9, T6);
Chris@82 338 TQ = KP866025403 * (T7 - T8);
Chris@82 339 Tm = ci[WS(rs, 7)];
Chris@82 340 Tn = ci[WS(rs, 4)];
Chris@82 341 To = cr[WS(rs, 7)];
Chris@82 342 Tp = Tn - To;
Chris@82 343 TO = KP866025403 * (Tn + To);
Chris@82 344 TR = FNMS(KP500000000, Tp, Tm);
Chris@82 345 }
Chris@82 346 {
Chris@82 347 E Tc, Td, Ts, Tt;
Chris@82 348 Tb = cr[WS(rs, 2)];
Chris@82 349 Tc = ci[WS(rs, 3)];
Chris@82 350 Td = ci[0];
Chris@82 351 Te = Tc + Td;
Chris@82 352 TU = FNMS(KP500000000, Te, Tb);
Chris@82 353 TX = KP866025403 * (Tc - Td);
Chris@82 354 Tr = ci[WS(rs, 6)];
Chris@82 355 Ts = cr[WS(rs, 5)];
Chris@82 356 Tt = cr[WS(rs, 8)];
Chris@82 357 Tu = Ts + Tt;
Chris@82 358 TV = KP866025403 * (Ts - Tt);
Chris@82 359 TY = FMA(KP500000000, Tu, Tr);
Chris@82 360 }
Chris@82 361 {
Chris@82 362 E TP, TS, T1s, T1t;
Chris@82 363 Ta = T6 + T9;
Chris@82 364 Tf = Tb + Te;
Chris@82 365 Tg = Ta + Tf;
Chris@82 366 Tq = Tm + Tp;
Chris@82 367 Tv = Tr - Tu;
Chris@82 368 Tw = Tq + Tv;
Chris@82 369 TP = TN - TO;
Chris@82 370 TS = TQ + TR;
Chris@82 371 TT = FNMS(KP642787609, TS, KP766044443 * TP);
Chris@82 372 T17 = FMA(KP766044443, TS, KP642787609 * TP);
Chris@82 373 T1s = TU - TV;
Chris@82 374 T1t = TY - TX;
Chris@82 375 T1u = FMA(KP939692620, T1s, KP342020143 * T1t);
Chris@82 376 T1A = FNMS(KP939692620, T1t, KP342020143 * T1s);
Chris@82 377 {
Chris@82 378 E T1p, T1q, TW, TZ;
Chris@82 379 T1p = TN + TO;
Chris@82 380 T1q = TR - TQ;
Chris@82 381 T1r = FNMS(KP984807753, T1q, KP173648177 * T1p);
Chris@82 382 T1z = FMA(KP173648177, T1q, KP984807753 * T1p);
Chris@82 383 TW = TU + TV;
Chris@82 384 TZ = TX + TY;
Chris@82 385 T10 = FNMS(KP984807753, TZ, KP173648177 * TW);
Chris@82 386 T18 = FMA(KP984807753, TW, KP173648177 * TZ);
Chris@82 387 }
Chris@82 388 }
Chris@82 389 }
Chris@82 390 cr[0] = T5 + Tg;
Chris@82 391 ci[0] = Tl + Tw;
Chris@82 392 {
Chris@82 393 E TA, TG, TE, TI;
Chris@82 394 {
Chris@82 395 E Ty, Tz, TC, TD;
Chris@82 396 Ty = FNMS(KP500000000, Tg, T5);
Chris@82 397 Tz = KP866025403 * (Tv - Tq);
Chris@82 398 TA = Ty - Tz;
Chris@82 399 TG = Ty + Tz;
Chris@82 400 TC = FNMS(KP500000000, Tw, Tl);
Chris@82 401 TD = KP866025403 * (Ta - Tf);
Chris@82 402 TE = TC - TD;
Chris@82 403 TI = TD + TC;
Chris@82 404 }
Chris@82 405 {
Chris@82 406 E Tx, TB, TF, TH;
Chris@82 407 Tx = W[10];
Chris@82 408 TB = W[11];
Chris@82 409 cr[WS(rs, 6)] = FNMS(TB, TE, Tx * TA);
Chris@82 410 ci[WS(rs, 6)] = FMA(Tx, TE, TB * TA);
Chris@82 411 TF = W[4];
Chris@82 412 TH = W[5];
Chris@82 413 cr[WS(rs, 3)] = FNMS(TH, TI, TF * TG);
Chris@82 414 ci[WS(rs, 3)] = FMA(TF, TI, TH * TG);
Chris@82 415 }
Chris@82 416 }
Chris@82 417 {
Chris@82 418 E T1d, T1h, T12, T1c, T1a, T1g, T11, T19, TJ, T13;
Chris@82 419 T1d = KP866025403 * (T18 - T17);
Chris@82 420 T1h = KP866025403 * (TT - T10);
Chris@82 421 T11 = TT + T10;
Chris@82 422 T12 = TM + T11;
Chris@82 423 T1c = FNMS(KP500000000, T11, TM);
Chris@82 424 T19 = T17 + T18;
Chris@82 425 T1a = T16 + T19;
Chris@82 426 T1g = FNMS(KP500000000, T19, T16);
Chris@82 427 TJ = W[0];
Chris@82 428 T13 = W[1];
Chris@82 429 cr[WS(rs, 1)] = FNMS(T13, T1a, TJ * T12);
Chris@82 430 ci[WS(rs, 1)] = FMA(T13, T12, TJ * T1a);
Chris@82 431 {
Chris@82 432 E T1k, T1m, T1j, T1l;
Chris@82 433 T1k = T1c + T1d;
Chris@82 434 T1m = T1h + T1g;
Chris@82 435 T1j = W[6];
Chris@82 436 T1l = W[7];
Chris@82 437 cr[WS(rs, 4)] = FNMS(T1l, T1m, T1j * T1k);
Chris@82 438 ci[WS(rs, 4)] = FMA(T1j, T1m, T1l * T1k);
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E T1e, T1i, T1b, T1f;
Chris@82 442 T1e = T1c - T1d;
Chris@82 443 T1i = T1g - T1h;
Chris@82 444 T1b = W[12];
Chris@82 445 T1f = W[13];
Chris@82 446 cr[WS(rs, 7)] = FNMS(T1f, T1i, T1b * T1e);
Chris@82 447 ci[WS(rs, 7)] = FMA(T1b, T1i, T1f * T1e);
Chris@82 448 }
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E T1F, T1J, T1w, T1E, T1C, T1I, T1v, T1B, T1n, T1x;
Chris@82 452 T1F = KP866025403 * (T1A - T1z);
Chris@82 453 T1J = KP866025403 * (T1r + T1u);
Chris@82 454 T1v = T1r - T1u;
Chris@82 455 T1w = T1o + T1v;
Chris@82 456 T1E = FNMS(KP500000000, T1v, T1o);
Chris@82 457 T1B = T1z + T1A;
Chris@82 458 T1C = T1y + T1B;
Chris@82 459 T1I = FNMS(KP500000000, T1B, T1y);
Chris@82 460 T1n = W[2];
Chris@82 461 T1x = W[3];
Chris@82 462 cr[WS(rs, 2)] = FNMS(T1x, T1C, T1n * T1w);
Chris@82 463 ci[WS(rs, 2)] = FMA(T1n, T1C, T1x * T1w);
Chris@82 464 {
Chris@82 465 E T1M, T1O, T1L, T1N;
Chris@82 466 T1M = T1F + T1E;
Chris@82 467 T1O = T1I + T1J;
Chris@82 468 T1L = W[8];
Chris@82 469 T1N = W[9];
Chris@82 470 cr[WS(rs, 5)] = FNMS(T1N, T1O, T1L * T1M);
Chris@82 471 ci[WS(rs, 5)] = FMA(T1N, T1M, T1L * T1O);
Chris@82 472 }
Chris@82 473 {
Chris@82 474 E T1G, T1K, T1D, T1H;
Chris@82 475 T1G = T1E - T1F;
Chris@82 476 T1K = T1I - T1J;
Chris@82 477 T1D = W[14];
Chris@82 478 T1H = W[15];
Chris@82 479 cr[WS(rs, 8)] = FNMS(T1H, T1K, T1D * T1G);
Chris@82 480 ci[WS(rs, 8)] = FMA(T1H, T1G, T1D * T1K);
Chris@82 481 }
Chris@82 482 }
Chris@82 483 }
Chris@82 484 }
Chris@82 485 }
Chris@82 486
Chris@82 487 static const tw_instr twinstr[] = {
Chris@82 488 {TW_FULL, 1, 9},
Chris@82 489 {TW_NEXT, 1, 0}
Chris@82 490 };
Chris@82 491
Chris@82 492 static const hc2hc_desc desc = { 9, "hb_9", twinstr, &GENUS, {60, 36, 36, 0} };
Chris@82 493
Chris@82 494 void X(codelet_hb_9) (planner *p) {
Chris@82 495 X(khc2hc_register) (p, hb_9, &desc);
Chris@82 496 }
Chris@82 497 #endif