annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:32 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 118 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 72 additions, 22 multiplications, 46 fused multiply/add),
Chris@82 33 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 44 E T18, T20, T1b, T21, T1s, T2a, T1p, T29, TI, TN, TO, Tb, To, T1f, T23;
Chris@82 45 E T1i, T24, T1z, T2d, T1w, T2c, Tt, Ty, Tz, Tm, TD;
Chris@82 46 {
Chris@82 47 E T1, TE, TM, T6, T4, T1o, TH, T17, TL, T1a, T9, T1r;
Chris@82 48 T1 = cr[0];
Chris@82 49 TE = ci[WS(rs, 11)];
Chris@82 50 TM = cr[WS(rs, 6)];
Chris@82 51 T6 = ci[WS(rs, 5)];
Chris@82 52 {
Chris@82 53 E T2, T3, TF, TG;
Chris@82 54 T2 = cr[WS(rs, 4)];
Chris@82 55 T3 = ci[WS(rs, 3)];
Chris@82 56 T4 = T2 + T3;
Chris@82 57 T1o = T2 - T3;
Chris@82 58 TF = ci[WS(rs, 7)];
Chris@82 59 TG = cr[WS(rs, 8)];
Chris@82 60 TH = TF - TG;
Chris@82 61 T17 = TF + TG;
Chris@82 62 }
Chris@82 63 {
Chris@82 64 E TJ, TK, T7, T8;
Chris@82 65 TJ = ci[WS(rs, 9)];
Chris@82 66 TK = cr[WS(rs, 10)];
Chris@82 67 TL = TJ - TK;
Chris@82 68 T1a = TJ + TK;
Chris@82 69 T7 = ci[WS(rs, 1)];
Chris@82 70 T8 = cr[WS(rs, 2)];
Chris@82 71 T9 = T7 + T8;
Chris@82 72 T1r = T7 - T8;
Chris@82 73 }
Chris@82 74 {
Chris@82 75 E T16, T19, T1q, T1n, T5, Ta;
Chris@82 76 T16 = FNMS(KP500000000, T4, T1);
Chris@82 77 T18 = FNMS(KP866025403, T17, T16);
Chris@82 78 T20 = FMA(KP866025403, T17, T16);
Chris@82 79 T19 = FNMS(KP500000000, T9, T6);
Chris@82 80 T1b = FMA(KP866025403, T1a, T19);
Chris@82 81 T21 = FNMS(KP866025403, T1a, T19);
Chris@82 82 T1q = FMA(KP500000000, TL, TM);
Chris@82 83 T1s = FNMS(KP866025403, T1r, T1q);
Chris@82 84 T2a = FMA(KP866025403, T1r, T1q);
Chris@82 85 T1n = FNMS(KP500000000, TH, TE);
Chris@82 86 T1p = FMA(KP866025403, T1o, T1n);
Chris@82 87 T29 = FNMS(KP866025403, T1o, T1n);
Chris@82 88 TI = TE + TH;
Chris@82 89 TN = TL - TM;
Chris@82 90 TO = TI - TN;
Chris@82 91 T5 = T1 + T4;
Chris@82 92 Ta = T6 + T9;
Chris@82 93 Tb = T5 + Ta;
Chris@82 94 To = T5 - Ta;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Tc, Tp, Tx, Th, Tf, T1v, Ts, T1e, Tw, T1h, Tk, T1y;
Chris@82 99 Tc = cr[WS(rs, 3)];
Chris@82 100 Tp = ci[WS(rs, 8)];
Chris@82 101 Tx = cr[WS(rs, 9)];
Chris@82 102 Th = ci[WS(rs, 2)];
Chris@82 103 {
Chris@82 104 E Td, Te, Tq, Tr;
Chris@82 105 Td = ci[WS(rs, 4)];
Chris@82 106 Te = ci[0];
Chris@82 107 Tf = Td + Te;
Chris@82 108 T1v = Td - Te;
Chris@82 109 Tq = cr[WS(rs, 7)];
Chris@82 110 Tr = cr[WS(rs, 11)];
Chris@82 111 Ts = Tq + Tr;
Chris@82 112 T1e = Tq - Tr;
Chris@82 113 }
Chris@82 114 {
Chris@82 115 E Tu, Tv, Ti, Tj;
Chris@82 116 Tu = ci[WS(rs, 10)];
Chris@82 117 Tv = ci[WS(rs, 6)];
Chris@82 118 Tw = Tu + Tv;
Chris@82 119 T1h = Tv - Tu;
Chris@82 120 Ti = cr[WS(rs, 1)];
Chris@82 121 Tj = cr[WS(rs, 5)];
Chris@82 122 Tk = Ti + Tj;
Chris@82 123 T1y = Ti - Tj;
Chris@82 124 }
Chris@82 125 {
Chris@82 126 E T1d, T1g, T1x, T1u, Tg, Tl;
Chris@82 127 T1d = FNMS(KP500000000, Tf, Tc);
Chris@82 128 T1f = FMA(KP866025403, T1e, T1d);
Chris@82 129 T23 = FNMS(KP866025403, T1e, T1d);
Chris@82 130 T1g = FNMS(KP500000000, Tk, Th);
Chris@82 131 T1i = FMA(KP866025403, T1h, T1g);
Chris@82 132 T24 = FNMS(KP866025403, T1h, T1g);
Chris@82 133 T1x = FMA(KP500000000, Tw, Tx);
Chris@82 134 T1z = FNMS(KP866025403, T1y, T1x);
Chris@82 135 T2d = FMA(KP866025403, T1y, T1x);
Chris@82 136 T1u = FMA(KP500000000, Ts, Tp);
Chris@82 137 T1w = FMA(KP866025403, T1v, T1u);
Chris@82 138 T2c = FNMS(KP866025403, T1v, T1u);
Chris@82 139 Tt = Tp - Ts;
Chris@82 140 Ty = Tw - Tx;
Chris@82 141 Tz = Tt - Ty;
Chris@82 142 Tg = Tc + Tf;
Chris@82 143 Tl = Th + Tk;
Chris@82 144 Tm = Tg + Tl;
Chris@82 145 TD = Tg - Tl;
Chris@82 146 }
Chris@82 147 }
Chris@82 148 cr[0] = Tb + Tm;
Chris@82 149 {
Chris@82 150 E TA, TP, TB, TQ, Tn, TC;
Chris@82 151 TA = To - Tz;
Chris@82 152 TP = TD + TO;
Chris@82 153 Tn = W[16];
Chris@82 154 TB = Tn * TA;
Chris@82 155 TQ = Tn * TP;
Chris@82 156 TC = W[17];
Chris@82 157 cr[WS(rs, 9)] = FNMS(TC, TP, TB);
Chris@82 158 ci[WS(rs, 9)] = FMA(TC, TA, TQ);
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E TS, TV, TT, TW, TR, TU;
Chris@82 162 TS = To + Tz;
Chris@82 163 TV = TO - TD;
Chris@82 164 TR = W[4];
Chris@82 165 TT = TR * TS;
Chris@82 166 TW = TR * TV;
Chris@82 167 TU = W[5];
Chris@82 168 cr[WS(rs, 3)] = FNMS(TU, TV, TT);
Chris@82 169 ci[WS(rs, 3)] = FMA(TU, TS, TW);
Chris@82 170 }
Chris@82 171 {
Chris@82 172 E T11, T12, T13, TX, TZ, T10, T14, TY;
Chris@82 173 T11 = TI + TN;
Chris@82 174 T12 = Tt + Ty;
Chris@82 175 T13 = T11 - T12;
Chris@82 176 TY = Tb - Tm;
Chris@82 177 TX = W[10];
Chris@82 178 TZ = TX * TY;
Chris@82 179 T10 = W[11];
Chris@82 180 T14 = T10 * TY;
Chris@82 181 ci[0] = T11 + T12;
Chris@82 182 ci[WS(rs, 6)] = FMA(TX, T13, T14);
Chris@82 183 cr[WS(rs, 6)] = FNMS(T10, T13, TZ);
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E T1k, T1E, T1B, T1H;
Chris@82 187 {
Chris@82 188 E T1c, T1j, T1t, T1A;
Chris@82 189 T1c = T18 + T1b;
Chris@82 190 T1j = T1f + T1i;
Chris@82 191 T1k = T1c - T1j;
Chris@82 192 T1E = T1c + T1j;
Chris@82 193 T1t = T1p - T1s;
Chris@82 194 T1A = T1w - T1z;
Chris@82 195 T1B = T1t - T1A;
Chris@82 196 T1H = T1t + T1A;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T15, T1l, T1m, T1C;
Chris@82 200 T15 = W[18];
Chris@82 201 T1l = T15 * T1k;
Chris@82 202 T1m = W[19];
Chris@82 203 T1C = T1m * T1k;
Chris@82 204 cr[WS(rs, 10)] = FNMS(T1m, T1B, T1l);
Chris@82 205 ci[WS(rs, 10)] = FMA(T15, T1B, T1C);
Chris@82 206 }
Chris@82 207 {
Chris@82 208 E T1D, T1F, T1G, T1I;
Chris@82 209 T1D = W[6];
Chris@82 210 T1F = T1D * T1E;
Chris@82 211 T1G = W[7];
Chris@82 212 T1I = T1G * T1E;
Chris@82 213 cr[WS(rs, 4)] = FNMS(T1G, T1H, T1F);
Chris@82 214 ci[WS(rs, 4)] = FMA(T1D, T1H, T1I);
Chris@82 215 }
Chris@82 216 }
Chris@82 217 {
Chris@82 218 E T26, T2i, T2f, T2l;
Chris@82 219 {
Chris@82 220 E T22, T25, T2b, T2e;
Chris@82 221 T22 = T20 + T21;
Chris@82 222 T25 = T23 + T24;
Chris@82 223 T26 = T22 - T25;
Chris@82 224 T2i = T22 + T25;
Chris@82 225 T2b = T29 - T2a;
Chris@82 226 T2e = T2c - T2d;
Chris@82 227 T2f = T2b - T2e;
Chris@82 228 T2l = T2b + T2e;
Chris@82 229 }
Chris@82 230 {
Chris@82 231 E T1Z, T27, T28, T2g;
Chris@82 232 T1Z = W[2];
Chris@82 233 T27 = T1Z * T26;
Chris@82 234 T28 = W[3];
Chris@82 235 T2g = T28 * T26;
Chris@82 236 cr[WS(rs, 2)] = FNMS(T28, T2f, T27);
Chris@82 237 ci[WS(rs, 2)] = FMA(T1Z, T2f, T2g);
Chris@82 238 }
Chris@82 239 {
Chris@82 240 E T2h, T2j, T2k, T2m;
Chris@82 241 T2h = W[14];
Chris@82 242 T2j = T2h * T2i;
Chris@82 243 T2k = W[15];
Chris@82 244 T2m = T2k * T2i;
Chris@82 245 cr[WS(rs, 8)] = FNMS(T2k, T2l, T2j);
Chris@82 246 ci[WS(rs, 8)] = FMA(T2h, T2l, T2m);
Chris@82 247 }
Chris@82 248 }
Chris@82 249 {
Chris@82 250 E T2q, T2y, T2v, T2B;
Chris@82 251 {
Chris@82 252 E T2o, T2p, T2t, T2u;
Chris@82 253 T2o = T20 - T21;
Chris@82 254 T2p = T2c + T2d;
Chris@82 255 T2q = T2o - T2p;
Chris@82 256 T2y = T2o + T2p;
Chris@82 257 T2t = T29 + T2a;
Chris@82 258 T2u = T23 - T24;
Chris@82 259 T2v = T2t + T2u;
Chris@82 260 T2B = T2t - T2u;
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E T2r, T2w, T2n, T2s;
Chris@82 264 T2n = W[8];
Chris@82 265 T2r = T2n * T2q;
Chris@82 266 T2w = T2n * T2v;
Chris@82 267 T2s = W[9];
Chris@82 268 cr[WS(rs, 5)] = FNMS(T2s, T2v, T2r);
Chris@82 269 ci[WS(rs, 5)] = FMA(T2s, T2q, T2w);
Chris@82 270 }
Chris@82 271 {
Chris@82 272 E T2z, T2C, T2x, T2A;
Chris@82 273 T2x = W[20];
Chris@82 274 T2z = T2x * T2y;
Chris@82 275 T2C = T2x * T2B;
Chris@82 276 T2A = W[21];
Chris@82 277 cr[WS(rs, 11)] = FNMS(T2A, T2B, T2z);
Chris@82 278 ci[WS(rs, 11)] = FMA(T2A, T2y, T2C);
Chris@82 279 }
Chris@82 280 }
Chris@82 281 {
Chris@82 282 E T1M, T1U, T1R, T1X;
Chris@82 283 {
Chris@82 284 E T1K, T1L, T1P, T1Q;
Chris@82 285 T1K = T18 - T1b;
Chris@82 286 T1L = T1w + T1z;
Chris@82 287 T1M = T1K - T1L;
Chris@82 288 T1U = T1K + T1L;
Chris@82 289 T1P = T1p + T1s;
Chris@82 290 T1Q = T1f - T1i;
Chris@82 291 T1R = T1P + T1Q;
Chris@82 292 T1X = T1P - T1Q;
Chris@82 293 }
Chris@82 294 {
Chris@82 295 E T1N, T1S, T1J, T1O;
Chris@82 296 T1J = W[0];
Chris@82 297 T1N = T1J * T1M;
Chris@82 298 T1S = T1J * T1R;
Chris@82 299 T1O = W[1];
Chris@82 300 cr[WS(rs, 1)] = FNMS(T1O, T1R, T1N);
Chris@82 301 ci[WS(rs, 1)] = FMA(T1O, T1M, T1S);
Chris@82 302 }
Chris@82 303 {
Chris@82 304 E T1V, T1Y, T1T, T1W;
Chris@82 305 T1T = W[12];
Chris@82 306 T1V = T1T * T1U;
Chris@82 307 T1Y = T1T * T1X;
Chris@82 308 T1W = W[13];
Chris@82 309 cr[WS(rs, 7)] = FNMS(T1W, T1X, T1V);
Chris@82 310 ci[WS(rs, 7)] = FMA(T1W, T1U, T1Y);
Chris@82 311 }
Chris@82 312 }
Chris@82 313 }
Chris@82 314 }
Chris@82 315 }
Chris@82 316
Chris@82 317 static const tw_instr twinstr[] = {
Chris@82 318 {TW_FULL, 1, 12},
Chris@82 319 {TW_NEXT, 1, 0}
Chris@82 320 };
Chris@82 321
Chris@82 322 static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {72, 22, 46, 0} };
Chris@82 323
Chris@82 324 void X(codelet_hb_12) (planner *p) {
Chris@82 325 X(khc2hc_register) (p, hb_12, &desc);
Chris@82 326 }
Chris@82 327 #else
Chris@82 328
Chris@82 329 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -dif -name hb_12 -include rdft/scalar/hb.h */
Chris@82 330
Chris@82 331 /*
Chris@82 332 * This function contains 118 FP additions, 60 FP multiplications,
Chris@82 333 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 334 * 39 stack variables, 2 constants, and 48 memory accesses
Chris@82 335 */
Chris@82 336 #include "rdft/scalar/hb.h"
Chris@82 337
Chris@82 338 static void hb_12(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 339 {
Chris@82 340 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 341 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 342 {
Chris@82 343 INT m;
Chris@82 344 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 345 E T5, TH, T12, T1M, T1i, T1U, Tg, Tt, T19, T1X, T1p, T1P, Ta, TM, T15;
Chris@82 346 E T1N, T1l, T1V, Tl, Ty, T1c, T1Y, T1s, T1Q;
Chris@82 347 {
Chris@82 348 E T1, TD, T4, T1g, TG, T11, T10, T1h;
Chris@82 349 T1 = cr[0];
Chris@82 350 TD = ci[WS(rs, 11)];
Chris@82 351 {
Chris@82 352 E T2, T3, TE, TF;
Chris@82 353 T2 = cr[WS(rs, 4)];
Chris@82 354 T3 = ci[WS(rs, 3)];
Chris@82 355 T4 = T2 + T3;
Chris@82 356 T1g = KP866025403 * (T2 - T3);
Chris@82 357 TE = ci[WS(rs, 7)];
Chris@82 358 TF = cr[WS(rs, 8)];
Chris@82 359 TG = TE - TF;
Chris@82 360 T11 = KP866025403 * (TE + TF);
Chris@82 361 }
Chris@82 362 T5 = T1 + T4;
Chris@82 363 TH = TD + TG;
Chris@82 364 T10 = FNMS(KP500000000, T4, T1);
Chris@82 365 T12 = T10 - T11;
Chris@82 366 T1M = T10 + T11;
Chris@82 367 T1h = FNMS(KP500000000, TG, TD);
Chris@82 368 T1i = T1g + T1h;
Chris@82 369 T1U = T1h - T1g;
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E Tc, Tp, Tf, T17, Ts, T1o, T18, T1n;
Chris@82 373 Tc = cr[WS(rs, 3)];
Chris@82 374 Tp = ci[WS(rs, 8)];
Chris@82 375 {
Chris@82 376 E Td, Te, Tq, Tr;
Chris@82 377 Td = ci[WS(rs, 4)];
Chris@82 378 Te = ci[0];
Chris@82 379 Tf = Td + Te;
Chris@82 380 T17 = KP866025403 * (Td - Te);
Chris@82 381 Tq = cr[WS(rs, 7)];
Chris@82 382 Tr = cr[WS(rs, 11)];
Chris@82 383 Ts = Tq + Tr;
Chris@82 384 T1o = KP866025403 * (Tq - Tr);
Chris@82 385 }
Chris@82 386 Tg = Tc + Tf;
Chris@82 387 Tt = Tp - Ts;
Chris@82 388 T18 = FMA(KP500000000, Ts, Tp);
Chris@82 389 T19 = T17 + T18;
Chris@82 390 T1X = T18 - T17;
Chris@82 391 T1n = FNMS(KP500000000, Tf, Tc);
Chris@82 392 T1p = T1n + T1o;
Chris@82 393 T1P = T1n - T1o;
Chris@82 394 }
Chris@82 395 {
Chris@82 396 E T6, TL, T9, T1j, TK, T14, T13, T1k;
Chris@82 397 T6 = ci[WS(rs, 5)];
Chris@82 398 TL = cr[WS(rs, 6)];
Chris@82 399 {
Chris@82 400 E T7, T8, TI, TJ;
Chris@82 401 T7 = ci[WS(rs, 1)];
Chris@82 402 T8 = cr[WS(rs, 2)];
Chris@82 403 T9 = T7 + T8;
Chris@82 404 T1j = KP866025403 * (T7 - T8);
Chris@82 405 TI = ci[WS(rs, 9)];
Chris@82 406 TJ = cr[WS(rs, 10)];
Chris@82 407 TK = TI - TJ;
Chris@82 408 T14 = KP866025403 * (TI + TJ);
Chris@82 409 }
Chris@82 410 Ta = T6 + T9;
Chris@82 411 TM = TK - TL;
Chris@82 412 T13 = FNMS(KP500000000, T9, T6);
Chris@82 413 T15 = T13 + T14;
Chris@82 414 T1N = T13 - T14;
Chris@82 415 T1k = FMA(KP500000000, TK, TL);
Chris@82 416 T1l = T1j - T1k;
Chris@82 417 T1V = T1j + T1k;
Chris@82 418 }
Chris@82 419 {
Chris@82 420 E Th, Tx, Tk, T1a, Tw, T1r, T1b, T1q;
Chris@82 421 Th = ci[WS(rs, 2)];
Chris@82 422 Tx = cr[WS(rs, 9)];
Chris@82 423 {
Chris@82 424 E Ti, Tj, Tu, Tv;
Chris@82 425 Ti = cr[WS(rs, 1)];
Chris@82 426 Tj = cr[WS(rs, 5)];
Chris@82 427 Tk = Ti + Tj;
Chris@82 428 T1a = KP866025403 * (Ti - Tj);
Chris@82 429 Tu = ci[WS(rs, 10)];
Chris@82 430 Tv = ci[WS(rs, 6)];
Chris@82 431 Tw = Tu + Tv;
Chris@82 432 T1r = KP866025403 * (Tv - Tu);
Chris@82 433 }
Chris@82 434 Tl = Th + Tk;
Chris@82 435 Ty = Tw - Tx;
Chris@82 436 T1b = FMA(KP500000000, Tw, Tx);
Chris@82 437 T1c = T1a - T1b;
Chris@82 438 T1Y = T1a + T1b;
Chris@82 439 T1q = FNMS(KP500000000, Tk, Th);
Chris@82 440 T1s = T1q + T1r;
Chris@82 441 T1Q = T1q - T1r;
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E Tb, Tm, TU, TW, TX, TY, TT, TV;
Chris@82 445 Tb = T5 + Ta;
Chris@82 446 Tm = Tg + Tl;
Chris@82 447 TU = Tb - Tm;
Chris@82 448 TW = TH + TM;
Chris@82 449 TX = Tt + Ty;
Chris@82 450 TY = TW - TX;
Chris@82 451 cr[0] = Tb + Tm;
Chris@82 452 ci[0] = TW + TX;
Chris@82 453 TT = W[10];
Chris@82 454 TV = W[11];
Chris@82 455 cr[WS(rs, 6)] = FNMS(TV, TY, TT * TU);
Chris@82 456 ci[WS(rs, 6)] = FMA(TV, TU, TT * TY);
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E TA, TQ, TO, TS;
Chris@82 460 {
Chris@82 461 E To, Tz, TC, TN;
Chris@82 462 To = T5 - Ta;
Chris@82 463 Tz = Tt - Ty;
Chris@82 464 TA = To - Tz;
Chris@82 465 TQ = To + Tz;
Chris@82 466 TC = Tg - Tl;
Chris@82 467 TN = TH - TM;
Chris@82 468 TO = TC + TN;
Chris@82 469 TS = TN - TC;
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E Tn, TB, TP, TR;
Chris@82 473 Tn = W[16];
Chris@82 474 TB = W[17];
Chris@82 475 cr[WS(rs, 9)] = FNMS(TB, TO, Tn * TA);
Chris@82 476 ci[WS(rs, 9)] = FMA(Tn, TO, TB * TA);
Chris@82 477 TP = W[4];
Chris@82 478 TR = W[5];
Chris@82 479 cr[WS(rs, 3)] = FNMS(TR, TS, TP * TQ);
Chris@82 480 ci[WS(rs, 3)] = FMA(TP, TS, TR * TQ);
Chris@82 481 }
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T28, T2e, T2c, T2g;
Chris@82 485 {
Chris@82 486 E T26, T27, T2a, T2b;
Chris@82 487 T26 = T1M - T1N;
Chris@82 488 T27 = T1X + T1Y;
Chris@82 489 T28 = T26 - T27;
Chris@82 490 T2e = T26 + T27;
Chris@82 491 T2a = T1U + T1V;
Chris@82 492 T2b = T1P - T1Q;
Chris@82 493 T2c = T2a + T2b;
Chris@82 494 T2g = T2a - T2b;
Chris@82 495 }
Chris@82 496 {
Chris@82 497 E T25, T29, T2d, T2f;
Chris@82 498 T25 = W[8];
Chris@82 499 T29 = W[9];
Chris@82 500 cr[WS(rs, 5)] = FNMS(T29, T2c, T25 * T28);
Chris@82 501 ci[WS(rs, 5)] = FMA(T25, T2c, T29 * T28);
Chris@82 502 T2d = W[20];
Chris@82 503 T2f = W[21];
Chris@82 504 cr[WS(rs, 11)] = FNMS(T2f, T2g, T2d * T2e);
Chris@82 505 ci[WS(rs, 11)] = FMA(T2d, T2g, T2f * T2e);
Chris@82 506 }
Chris@82 507 }
Chris@82 508 {
Chris@82 509 E T1S, T22, T20, T24;
Chris@82 510 {
Chris@82 511 E T1O, T1R, T1W, T1Z;
Chris@82 512 T1O = T1M + T1N;
Chris@82 513 T1R = T1P + T1Q;
Chris@82 514 T1S = T1O - T1R;
Chris@82 515 T22 = T1O + T1R;
Chris@82 516 T1W = T1U - T1V;
Chris@82 517 T1Z = T1X - T1Y;
Chris@82 518 T20 = T1W - T1Z;
Chris@82 519 T24 = T1W + T1Z;
Chris@82 520 }
Chris@82 521 {
Chris@82 522 E T1L, T1T, T21, T23;
Chris@82 523 T1L = W[2];
Chris@82 524 T1T = W[3];
Chris@82 525 cr[WS(rs, 2)] = FNMS(T1T, T20, T1L * T1S);
Chris@82 526 ci[WS(rs, 2)] = FMA(T1T, T1S, T1L * T20);
Chris@82 527 T21 = W[14];
Chris@82 528 T23 = W[15];
Chris@82 529 cr[WS(rs, 8)] = FNMS(T23, T24, T21 * T22);
Chris@82 530 ci[WS(rs, 8)] = FMA(T23, T22, T21 * T24);
Chris@82 531 }
Chris@82 532 }
Chris@82 533 {
Chris@82 534 E T1C, T1I, T1G, T1K;
Chris@82 535 {
Chris@82 536 E T1A, T1B, T1E, T1F;
Chris@82 537 T1A = T12 + T15;
Chris@82 538 T1B = T1p + T1s;
Chris@82 539 T1C = T1A - T1B;
Chris@82 540 T1I = T1A + T1B;
Chris@82 541 T1E = T1i + T1l;
Chris@82 542 T1F = T19 + T1c;
Chris@82 543 T1G = T1E - T1F;
Chris@82 544 T1K = T1E + T1F;
Chris@82 545 }
Chris@82 546 {
Chris@82 547 E T1z, T1D, T1H, T1J;
Chris@82 548 T1z = W[18];
Chris@82 549 T1D = W[19];
Chris@82 550 cr[WS(rs, 10)] = FNMS(T1D, T1G, T1z * T1C);
Chris@82 551 ci[WS(rs, 10)] = FMA(T1D, T1C, T1z * T1G);
Chris@82 552 T1H = W[6];
Chris@82 553 T1J = W[7];
Chris@82 554 cr[WS(rs, 4)] = FNMS(T1J, T1K, T1H * T1I);
Chris@82 555 ci[WS(rs, 4)] = FMA(T1J, T1I, T1H * T1K);
Chris@82 556 }
Chris@82 557 }
Chris@82 558 {
Chris@82 559 E T1e, T1w, T1u, T1y;
Chris@82 560 {
Chris@82 561 E T16, T1d, T1m, T1t;
Chris@82 562 T16 = T12 - T15;
Chris@82 563 T1d = T19 - T1c;
Chris@82 564 T1e = T16 - T1d;
Chris@82 565 T1w = T16 + T1d;
Chris@82 566 T1m = T1i - T1l;
Chris@82 567 T1t = T1p - T1s;
Chris@82 568 T1u = T1m + T1t;
Chris@82 569 T1y = T1m - T1t;
Chris@82 570 }
Chris@82 571 {
Chris@82 572 E TZ, T1f, T1v, T1x;
Chris@82 573 TZ = W[0];
Chris@82 574 T1f = W[1];
Chris@82 575 cr[WS(rs, 1)] = FNMS(T1f, T1u, TZ * T1e);
Chris@82 576 ci[WS(rs, 1)] = FMA(TZ, T1u, T1f * T1e);
Chris@82 577 T1v = W[12];
Chris@82 578 T1x = W[13];
Chris@82 579 cr[WS(rs, 7)] = FNMS(T1x, T1y, T1v * T1w);
Chris@82 580 ci[WS(rs, 7)] = FMA(T1v, T1y, T1x * T1w);
Chris@82 581 }
Chris@82 582 }
Chris@82 583 }
Chris@82 584 }
Chris@82 585 }
Chris@82 586
Chris@82 587 static const tw_instr twinstr[] = {
Chris@82 588 {TW_FULL, 1, 12},
Chris@82 589 {TW_NEXT, 1, 0}
Chris@82 590 };
Chris@82 591
Chris@82 592 static const hc2hc_desc desc = { 12, "hb_12", twinstr, &GENUS, {88, 30, 30, 0} };
Chris@82 593
Chris@82 594 void X(codelet_hb_12) (planner *p) {
Chris@82 595 X(khc2hc_register) (p, hb_12, &desc);
Chris@82 596 }
Chris@82 597 #endif