annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:29 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@82 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@82 33 * 55 stack variables, 10 constants, and 36 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@82 40 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@82 41 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 42 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@82 43 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@82 44 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@82 45 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@82 46 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@82 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 49 {
Chris@82 50 INT m;
Chris@82 51 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 52 E T1, T1P, Te, T1S, T10, T1Q, T1a, T1d, Ty, T18, Tl, T13, T19, T1c, T1l;
Chris@82 53 E T1r, TS, T1p, TF, T1o, T1g, T1q;
Chris@82 54 T1 = cr[0];
Chris@82 55 T1P = ci[0];
Chris@82 56 {
Chris@82 57 E T3, T6, T4, TW, T9, Tc, Ta, TY, T2, T8;
Chris@82 58 T3 = cr[WS(rs, 3)];
Chris@82 59 T6 = ci[WS(rs, 3)];
Chris@82 60 T2 = W[4];
Chris@82 61 T4 = T2 * T3;
Chris@82 62 TW = T2 * T6;
Chris@82 63 T9 = cr[WS(rs, 6)];
Chris@82 64 Tc = ci[WS(rs, 6)];
Chris@82 65 T8 = W[10];
Chris@82 66 Ta = T8 * T9;
Chris@82 67 TY = T8 * Tc;
Chris@82 68 {
Chris@82 69 E T7, TX, Td, TZ, T5, Tb;
Chris@82 70 T5 = W[5];
Chris@82 71 T7 = FMA(T5, T6, T4);
Chris@82 72 TX = FNMS(T5, T3, TW);
Chris@82 73 Tb = W[11];
Chris@82 74 Td = FMA(Tb, Tc, Ta);
Chris@82 75 TZ = FNMS(Tb, T9, TY);
Chris@82 76 Te = T7 + Td;
Chris@82 77 T1S = Td - T7;
Chris@82 78 T10 = TX - TZ;
Chris@82 79 T1Q = TX + TZ;
Chris@82 80 }
Chris@82 81 }
Chris@82 82 {
Chris@82 83 E Th, Tk, Ti, T12, Tx, T17, Tr, T15, Tg, Tj;
Chris@82 84 Th = cr[WS(rs, 1)];
Chris@82 85 Tk = ci[WS(rs, 1)];
Chris@82 86 Tg = W[0];
Chris@82 87 Ti = Tg * Th;
Chris@82 88 T12 = Tg * Tk;
Chris@82 89 {
Chris@82 90 E Tt, Tw, Tu, T16, Ts, Tv;
Chris@82 91 Tt = cr[WS(rs, 7)];
Chris@82 92 Tw = ci[WS(rs, 7)];
Chris@82 93 Ts = W[12];
Chris@82 94 Tu = Ts * Tt;
Chris@82 95 T16 = Ts * Tw;
Chris@82 96 Tv = W[13];
Chris@82 97 Tx = FMA(Tv, Tw, Tu);
Chris@82 98 T17 = FNMS(Tv, Tt, T16);
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E Tn, Tq, To, T14, Tm, Tp;
Chris@82 102 Tn = cr[WS(rs, 4)];
Chris@82 103 Tq = ci[WS(rs, 4)];
Chris@82 104 Tm = W[6];
Chris@82 105 To = Tm * Tn;
Chris@82 106 T14 = Tm * Tq;
Chris@82 107 Tp = W[7];
Chris@82 108 Tr = FMA(Tp, Tq, To);
Chris@82 109 T15 = FNMS(Tp, Tn, T14);
Chris@82 110 }
Chris@82 111 T1a = Tr - Tx;
Chris@82 112 T1d = T15 - T17;
Chris@82 113 Ty = Tr + Tx;
Chris@82 114 T18 = T15 + T17;
Chris@82 115 Tj = W[1];
Chris@82 116 Tl = FMA(Tj, Tk, Ti);
Chris@82 117 T13 = FNMS(Tj, Th, T12);
Chris@82 118 T19 = FNMS(KP500000000, T18, T13);
Chris@82 119 T1c = FNMS(KP500000000, Ty, Tl);
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E TB, TE, TC, T1n, TR, T1k, TL, T1i, TA, TD;
Chris@82 123 TB = cr[WS(rs, 2)];
Chris@82 124 TE = ci[WS(rs, 2)];
Chris@82 125 TA = W[2];
Chris@82 126 TC = TA * TB;
Chris@82 127 T1n = TA * TE;
Chris@82 128 {
Chris@82 129 E TN, TQ, TO, T1j, TM, TP;
Chris@82 130 TN = cr[WS(rs, 8)];
Chris@82 131 TQ = ci[WS(rs, 8)];
Chris@82 132 TM = W[14];
Chris@82 133 TO = TM * TN;
Chris@82 134 T1j = TM * TQ;
Chris@82 135 TP = W[15];
Chris@82 136 TR = FMA(TP, TQ, TO);
Chris@82 137 T1k = FNMS(TP, TN, T1j);
Chris@82 138 }
Chris@82 139 {
Chris@82 140 E TH, TK, TI, T1h, TG, TJ;
Chris@82 141 TH = cr[WS(rs, 5)];
Chris@82 142 TK = ci[WS(rs, 5)];
Chris@82 143 TG = W[8];
Chris@82 144 TI = TG * TH;
Chris@82 145 T1h = TG * TK;
Chris@82 146 TJ = W[9];
Chris@82 147 TL = FMA(TJ, TK, TI);
Chris@82 148 T1i = FNMS(TJ, TH, T1h);
Chris@82 149 }
Chris@82 150 T1l = T1i - T1k;
Chris@82 151 T1r = TR - TL;
Chris@82 152 TS = TL + TR;
Chris@82 153 T1p = T1i + T1k;
Chris@82 154 TD = W[3];
Chris@82 155 TF = FMA(TD, TE, TC);
Chris@82 156 T1o = FNMS(TD, TB, T1n);
Chris@82 157 T1g = FNMS(KP500000000, TS, TF);
Chris@82 158 T1q = FNMS(KP500000000, T1p, T1o);
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E Tf, T21, TU, T24, T1O, T22, T1L, T23;
Chris@82 162 Tf = T1 + Te;
Chris@82 163 T21 = T1Q + T1P;
Chris@82 164 {
Chris@82 165 E Tz, TT, T1M, T1N;
Chris@82 166 Tz = Tl + Ty;
Chris@82 167 TT = TF + TS;
Chris@82 168 TU = Tz + TT;
Chris@82 169 T24 = TT - Tz;
Chris@82 170 T1M = T13 + T18;
Chris@82 171 T1N = T1o + T1p;
Chris@82 172 T1O = T1M - T1N;
Chris@82 173 T22 = T1M + T1N;
Chris@82 174 }
Chris@82 175 cr[0] = Tf + TU;
Chris@82 176 ci[WS(rs, 8)] = T22 + T21;
Chris@82 177 T1L = FNMS(KP500000000, TU, Tf);
Chris@82 178 ci[WS(rs, 2)] = FNMS(KP866025403, T1O, T1L);
Chris@82 179 cr[WS(rs, 3)] = FMA(KP866025403, T1O, T1L);
Chris@82 180 T23 = FNMS(KP500000000, T22, T21);
Chris@82 181 cr[WS(rs, 6)] = FMS(KP866025403, T24, T23);
Chris@82 182 ci[WS(rs, 5)] = FMA(KP866025403, T24, T23);
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T11, T1z, T1T, T1X, T1f, T1w, T1t, T1x, T1u, T1Y, T1C, T1I, T1F, T1J, T1G;
Chris@82 186 E T1U, TV, T1R;
Chris@82 187 TV = FNMS(KP500000000, Te, T1);
Chris@82 188 T11 = FNMS(KP866025403, T10, TV);
Chris@82 189 T1z = FMA(KP866025403, T10, TV);
Chris@82 190 T1R = FNMS(KP500000000, T1Q, T1P);
Chris@82 191 T1T = FMA(KP866025403, T1S, T1R);
Chris@82 192 T1X = FNMS(KP866025403, T1S, T1R);
Chris@82 193 {
Chris@82 194 E T1b, T1e, T1m, T1s;
Chris@82 195 T1b = FMA(KP866025403, T1a, T19);
Chris@82 196 T1e = FNMS(KP866025403, T1d, T1c);
Chris@82 197 T1f = FMA(KP176326980, T1e, T1b);
Chris@82 198 T1w = FNMS(KP176326980, T1b, T1e);
Chris@82 199 T1m = FNMS(KP866025403, T1l, T1g);
Chris@82 200 T1s = FNMS(KP866025403, T1r, T1q);
Chris@82 201 T1t = FNMS(KP363970234, T1s, T1m);
Chris@82 202 T1x = FMA(KP363970234, T1m, T1s);
Chris@82 203 }
Chris@82 204 T1u = FNMS(KP954188894, T1t, T1f);
Chris@82 205 T1Y = FMA(KP954188894, T1x, T1w);
Chris@82 206 {
Chris@82 207 E T1A, T1B, T1D, T1E;
Chris@82 208 T1A = FMA(KP866025403, T1r, T1q);
Chris@82 209 T1B = FMA(KP866025403, T1l, T1g);
Chris@82 210 T1C = FMA(KP176326980, T1B, T1A);
Chris@82 211 T1I = FNMS(KP176326980, T1A, T1B);
Chris@82 212 T1D = FMA(KP866025403, T1d, T1c);
Chris@82 213 T1E = FNMS(KP866025403, T1a, T19);
Chris@82 214 T1F = FMA(KP839099631, T1E, T1D);
Chris@82 215 T1J = FNMS(KP839099631, T1D, T1E);
Chris@82 216 }
Chris@82 217 T1G = FMA(KP777861913, T1F, T1C);
Chris@82 218 T1U = FNMS(KP777861913, T1J, T1I);
Chris@82 219 cr[WS(rs, 2)] = FMA(KP984807753, T1u, T11);
Chris@82 220 ci[WS(rs, 7)] = FNMS(KP984807753, T1U, T1T);
Chris@82 221 ci[WS(rs, 6)] = FNMS(KP984807753, T1Y, T1X);
Chris@82 222 cr[WS(rs, 1)] = FMA(KP984807753, T1G, T1z);
Chris@82 223 {
Chris@82 224 E T1V, T1W, T1H, T1K;
Chris@82 225 T1V = FMA(KP492403876, T1U, T1T);
Chris@82 226 T1W = FNMS(KP777861913, T1F, T1C);
Chris@82 227 cr[WS(rs, 7)] = FMS(KP852868531, T1W, T1V);
Chris@82 228 ci[WS(rs, 4)] = FMA(KP852868531, T1W, T1V);
Chris@82 229 T1H = FNMS(KP492403876, T1G, T1z);
Chris@82 230 T1K = FMA(KP777861913, T1J, T1I);
Chris@82 231 ci[WS(rs, 1)] = FNMS(KP852868531, T1K, T1H);
Chris@82 232 cr[WS(rs, 4)] = FMA(KP852868531, T1K, T1H);
Chris@82 233 }
Chris@82 234 {
Chris@82 235 E T1v, T1y, T1Z, T20;
Chris@82 236 T1v = FNMS(KP492403876, T1u, T11);
Chris@82 237 T1y = FNMS(KP954188894, T1x, T1w);
Chris@82 238 ci[WS(rs, 3)] = FNMS(KP852868531, T1y, T1v);
Chris@82 239 ci[0] = FMA(KP852868531, T1y, T1v);
Chris@82 240 T1Z = FMA(KP492403876, T1Y, T1X);
Chris@82 241 T20 = FMA(KP954188894, T1t, T1f);
Chris@82 242 cr[WS(rs, 5)] = FMS(KP852868531, T20, T1Z);
Chris@82 243 cr[WS(rs, 8)] = -(FMA(KP852868531, T20, T1Z));
Chris@82 244 }
Chris@82 245 }
Chris@82 246 }
Chris@82 247 }
Chris@82 248 }
Chris@82 249
Chris@82 250 static const tw_instr twinstr[] = {
Chris@82 251 {TW_FULL, 1, 9},
Chris@82 252 {TW_NEXT, 1, 0}
Chris@82 253 };
Chris@82 254
Chris@82 255 static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {24, 16, 72, 0} };
Chris@82 256
Chris@82 257 void X(codelet_hf_9) (planner *p) {
Chris@82 258 X(khc2hc_register) (p, hf_9, &desc);
Chris@82 259 }
Chris@82 260 #else
Chris@82 261
Chris@82 262 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 9 -dit -name hf_9 -include rdft/scalar/hf.h */
Chris@82 263
Chris@82 264 /*
Chris@82 265 * This function contains 96 FP additions, 72 FP multiplications,
Chris@82 266 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@82 267 * 41 stack variables, 8 constants, and 36 memory accesses
Chris@82 268 */
Chris@82 269 #include "rdft/scalar/hf.h"
Chris@82 270
Chris@82 271 static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 272 {
Chris@82 273 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@82 274 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 275 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@82 276 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@82 277 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 278 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@82 279 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 280 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 281 {
Chris@82 282 INT m;
Chris@82 283 for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 284 E T1, T1B, TQ, T1A, Tc, TN, T1C, T1D, TL, T1x, T19, T1o, T1c, T1n, Tu;
Chris@82 285 E T1w, TW, T1k, T11, T1l;
Chris@82 286 {
Chris@82 287 E T6, TO, Tb, TP;
Chris@82 288 T1 = cr[0];
Chris@82 289 T1B = ci[0];
Chris@82 290 {
Chris@82 291 E T3, T5, T2, T4;
Chris@82 292 T3 = cr[WS(rs, 3)];
Chris@82 293 T5 = ci[WS(rs, 3)];
Chris@82 294 T2 = W[4];
Chris@82 295 T4 = W[5];
Chris@82 296 T6 = FMA(T2, T3, T4 * T5);
Chris@82 297 TO = FNMS(T4, T3, T2 * T5);
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T8, Ta, T7, T9;
Chris@82 301 T8 = cr[WS(rs, 6)];
Chris@82 302 Ta = ci[WS(rs, 6)];
Chris@82 303 T7 = W[10];
Chris@82 304 T9 = W[11];
Chris@82 305 Tb = FMA(T7, T8, T9 * Ta);
Chris@82 306 TP = FNMS(T9, T8, T7 * Ta);
Chris@82 307 }
Chris@82 308 TQ = KP866025403 * (TO - TP);
Chris@82 309 T1A = KP866025403 * (Tb - T6);
Chris@82 310 Tc = T6 + Tb;
Chris@82 311 TN = FNMS(KP500000000, Tc, T1);
Chris@82 312 T1C = TO + TP;
Chris@82 313 T1D = FNMS(KP500000000, T1C, T1B);
Chris@82 314 }
Chris@82 315 {
Chris@82 316 E Tz, T13, TE, T14, TJ, T15, TK, T16;
Chris@82 317 {
Chris@82 318 E Tw, Ty, Tv, Tx;
Chris@82 319 Tw = cr[WS(rs, 2)];
Chris@82 320 Ty = ci[WS(rs, 2)];
Chris@82 321 Tv = W[2];
Chris@82 322 Tx = W[3];
Chris@82 323 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 324 T13 = FNMS(Tx, Tw, Tv * Ty);
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E TB, TD, TA, TC;
Chris@82 328 TB = cr[WS(rs, 5)];
Chris@82 329 TD = ci[WS(rs, 5)];
Chris@82 330 TA = W[8];
Chris@82 331 TC = W[9];
Chris@82 332 TE = FMA(TA, TB, TC * TD);
Chris@82 333 T14 = FNMS(TC, TB, TA * TD);
Chris@82 334 }
Chris@82 335 {
Chris@82 336 E TG, TI, TF, TH;
Chris@82 337 TG = cr[WS(rs, 8)];
Chris@82 338 TI = ci[WS(rs, 8)];
Chris@82 339 TF = W[14];
Chris@82 340 TH = W[15];
Chris@82 341 TJ = FMA(TF, TG, TH * TI);
Chris@82 342 T15 = FNMS(TH, TG, TF * TI);
Chris@82 343 }
Chris@82 344 TK = TE + TJ;
Chris@82 345 T16 = T14 + T15;
Chris@82 346 TL = Tz + TK;
Chris@82 347 T1x = T13 + T16;
Chris@82 348 {
Chris@82 349 E T17, T18, T1a, T1b;
Chris@82 350 T17 = FNMS(KP500000000, T16, T13);
Chris@82 351 T18 = KP866025403 * (TJ - TE);
Chris@82 352 T19 = T17 - T18;
Chris@82 353 T1o = T18 + T17;
Chris@82 354 T1a = FNMS(KP500000000, TK, Tz);
Chris@82 355 T1b = KP866025403 * (T14 - T15);
Chris@82 356 T1c = T1a - T1b;
Chris@82 357 T1n = T1a + T1b;
Chris@82 358 }
Chris@82 359 }
Chris@82 360 {
Chris@82 361 E Ti, TX, Tn, TT, Ts, TU, Tt, TY;
Chris@82 362 {
Chris@82 363 E Tf, Th, Te, Tg;
Chris@82 364 Tf = cr[WS(rs, 1)];
Chris@82 365 Th = ci[WS(rs, 1)];
Chris@82 366 Te = W[0];
Chris@82 367 Tg = W[1];
Chris@82 368 Ti = FMA(Te, Tf, Tg * Th);
Chris@82 369 TX = FNMS(Tg, Tf, Te * Th);
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E Tk, Tm, Tj, Tl;
Chris@82 373 Tk = cr[WS(rs, 4)];
Chris@82 374 Tm = ci[WS(rs, 4)];
Chris@82 375 Tj = W[6];
Chris@82 376 Tl = W[7];
Chris@82 377 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 378 TT = FNMS(Tl, Tk, Tj * Tm);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E Tp, Tr, To, Tq;
Chris@82 382 Tp = cr[WS(rs, 7)];
Chris@82 383 Tr = ci[WS(rs, 7)];
Chris@82 384 To = W[12];
Chris@82 385 Tq = W[13];
Chris@82 386 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 387 TU = FNMS(Tq, Tp, To * Tr);
Chris@82 388 }
Chris@82 389 Tt = Tn + Ts;
Chris@82 390 TY = TT + TU;
Chris@82 391 Tu = Ti + Tt;
Chris@82 392 T1w = TX + TY;
Chris@82 393 {
Chris@82 394 E TS, TV, TZ, T10;
Chris@82 395 TS = FNMS(KP500000000, Tt, Ti);
Chris@82 396 TV = KP866025403 * (TT - TU);
Chris@82 397 TW = TS - TV;
Chris@82 398 T1k = TS + TV;
Chris@82 399 TZ = FNMS(KP500000000, TY, TX);
Chris@82 400 T10 = KP866025403 * (Ts - Tn);
Chris@82 401 T11 = TZ - T10;
Chris@82 402 T1l = T10 + TZ;
Chris@82 403 }
Chris@82 404 }
Chris@82 405 {
Chris@82 406 E T1y, Td, TM, T1v;
Chris@82 407 T1y = KP866025403 * (T1w - T1x);
Chris@82 408 Td = T1 + Tc;
Chris@82 409 TM = Tu + TL;
Chris@82 410 T1v = FNMS(KP500000000, TM, Td);
Chris@82 411 cr[0] = Td + TM;
Chris@82 412 cr[WS(rs, 3)] = T1v + T1y;
Chris@82 413 ci[WS(rs, 2)] = T1v - T1y;
Chris@82 414 }
Chris@82 415 {
Chris@82 416 E TR, T1I, T1e, T1K, T1i, T1H, T1f, T1J;
Chris@82 417 TR = TN - TQ;
Chris@82 418 T1I = T1D - T1A;
Chris@82 419 {
Chris@82 420 E T12, T1d, T1g, T1h;
Chris@82 421 T12 = FMA(KP173648177, TW, KP984807753 * T11);
Chris@82 422 T1d = FNMS(KP939692620, T1c, KP342020143 * T19);
Chris@82 423 T1e = T12 + T1d;
Chris@82 424 T1K = KP866025403 * (T1d - T12);
Chris@82 425 T1g = FNMS(KP984807753, TW, KP173648177 * T11);
Chris@82 426 T1h = FMA(KP342020143, T1c, KP939692620 * T19);
Chris@82 427 T1i = KP866025403 * (T1g + T1h);
Chris@82 428 T1H = T1g - T1h;
Chris@82 429 }
Chris@82 430 cr[WS(rs, 2)] = TR + T1e;
Chris@82 431 ci[WS(rs, 6)] = T1H + T1I;
Chris@82 432 T1f = FNMS(KP500000000, T1e, TR);
Chris@82 433 ci[0] = T1f - T1i;
Chris@82 434 ci[WS(rs, 3)] = T1f + T1i;
Chris@82 435 T1J = FMS(KP500000000, T1H, T1I);
Chris@82 436 cr[WS(rs, 5)] = T1J - T1K;
Chris@82 437 cr[WS(rs, 8)] = T1K + T1J;
Chris@82 438 }
Chris@82 439 {
Chris@82 440 E T1L, T1M, T1N, T1O;
Chris@82 441 T1L = KP866025403 * (TL - Tu);
Chris@82 442 T1M = T1C + T1B;
Chris@82 443 T1N = T1w + T1x;
Chris@82 444 T1O = FNMS(KP500000000, T1N, T1M);
Chris@82 445 cr[WS(rs, 6)] = T1L - T1O;
Chris@82 446 ci[WS(rs, 8)] = T1N + T1M;
Chris@82 447 ci[WS(rs, 5)] = T1L + T1O;
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T1j, T1E, T1q, T1z, T1u, T1F, T1r, T1G;
Chris@82 451 T1j = TN + TQ;
Chris@82 452 T1E = T1A + T1D;
Chris@82 453 {
Chris@82 454 E T1m, T1p, T1s, T1t;
Chris@82 455 T1m = FMA(KP766044443, T1k, KP642787609 * T1l);
Chris@82 456 T1p = FMA(KP173648177, T1n, KP984807753 * T1o);
Chris@82 457 T1q = T1m + T1p;
Chris@82 458 T1z = KP866025403 * (T1p - T1m);
Chris@82 459 T1s = FNMS(KP642787609, T1k, KP766044443 * T1l);
Chris@82 460 T1t = FNMS(KP984807753, T1n, KP173648177 * T1o);
Chris@82 461 T1u = KP866025403 * (T1s - T1t);
Chris@82 462 T1F = T1s + T1t;
Chris@82 463 }
Chris@82 464 cr[WS(rs, 1)] = T1j + T1q;
Chris@82 465 T1r = FNMS(KP500000000, T1q, T1j);
Chris@82 466 ci[WS(rs, 1)] = T1r - T1u;
Chris@82 467 cr[WS(rs, 4)] = T1r + T1u;
Chris@82 468 ci[WS(rs, 7)] = T1F + T1E;
Chris@82 469 T1G = FNMS(KP500000000, T1F, T1E);
Chris@82 470 cr[WS(rs, 7)] = T1z - T1G;
Chris@82 471 ci[WS(rs, 4)] = T1z + T1G;
Chris@82 472 }
Chris@82 473 }
Chris@82 474 }
Chris@82 475 }
Chris@82 476
Chris@82 477 static const tw_instr twinstr[] = {
Chris@82 478 {TW_FULL, 1, 9},
Chris@82 479 {TW_NEXT, 1, 0}
Chris@82 480 };
Chris@82 481
Chris@82 482 static const hc2hc_desc desc = { 9, "hf_9", twinstr, &GENUS, {60, 36, 36, 0} };
Chris@82 483
Chris@82 484 void X(codelet_hf_9) (planner *p) {
Chris@82 485 X(khc2hc_register) (p, hf_9, &desc);
Chris@82 486 }
Chris@82 487 #endif