annotate src/fftw-3.3.5/rdft/scalar/r2cb/hc2cb_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:51:30 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include hc2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@42 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@42 33 * 71 stack variables, 4 constants, and 40 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cb.h"
Chris@42 36
Chris@42 37 static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@42 46 E T21, T1Y, T1X;
Chris@42 47 {
Chris@42 48 E T1B, TH, T1g, T3, T1V, T1x, T1G, T1E, TM, TK, T11, TB, T7, T1m, T1J;
Chris@42 49 E TO, Th, T1h, T6, T8, TF, TG, T1i, T9;
Chris@42 50 TF = Ip[0];
Chris@42 51 TG = Im[WS(rs, 4)];
Chris@42 52 {
Chris@42 53 E T1u, Tp, Tu, T1s, Tz, T1v, Ts, Tv;
Chris@42 54 {
Chris@42 55 E Tx, Ty, Tn, To, Tq, Tr;
Chris@42 56 Tn = Ip[WS(rs, 4)];
Chris@42 57 To = Im[0];
Chris@42 58 Tx = Ip[WS(rs, 3)];
Chris@42 59 T1B = TF + TG;
Chris@42 60 TH = TF - TG;
Chris@42 61 T1u = Tn + To;
Chris@42 62 Tp = Tn - To;
Chris@42 63 Ty = Im[WS(rs, 1)];
Chris@42 64 Tq = Ip[WS(rs, 1)];
Chris@42 65 Tr = Im[WS(rs, 3)];
Chris@42 66 Tu = Ip[WS(rs, 2)];
Chris@42 67 T1s = Tx + Ty;
Chris@42 68 Tz = Tx - Ty;
Chris@42 69 T1v = Tq + Tr;
Chris@42 70 Ts = Tq - Tr;
Chris@42 71 Tv = Im[WS(rs, 2)];
Chris@42 72 }
Chris@42 73 {
Chris@42 74 E T1, T1w, T1D, TJ, Tt, T1r, Tw, T2;
Chris@42 75 T1 = Rp[0];
Chris@42 76 T1w = T1u + T1v;
Chris@42 77 T1D = T1u - T1v;
Chris@42 78 TJ = Tp + Ts;
Chris@42 79 Tt = Tp - Ts;
Chris@42 80 T1r = Tu + Tv;
Chris@42 81 Tw = Tu - Tv;
Chris@42 82 T2 = Rm[WS(rs, 4)];
Chris@42 83 {
Chris@42 84 E Tb, Tc, Te, Tf;
Chris@42 85 Tb = Rp[WS(rs, 4)];
Chris@42 86 {
Chris@42 87 E T1t, T1C, TI, TA;
Chris@42 88 T1t = T1r + T1s;
Chris@42 89 T1C = T1r - T1s;
Chris@42 90 TI = Tw + Tz;
Chris@42 91 TA = Tw - Tz;
Chris@42 92 T1g = T1 - T2;
Chris@42 93 T3 = T1 + T2;
Chris@42 94 T1V = FNMS(KP618033988, T1t, T1w);
Chris@42 95 T1x = FMA(KP618033988, T1w, T1t);
Chris@42 96 T1G = T1C - T1D;
Chris@42 97 T1E = T1C + T1D;
Chris@42 98 TM = TI - TJ;
Chris@42 99 TK = TI + TJ;
Chris@42 100 T11 = FMA(KP618033988, Tt, TA);
Chris@42 101 TB = FNMS(KP618033988, TA, Tt);
Chris@42 102 Tc = Rm[0];
Chris@42 103 }
Chris@42 104 Te = Rm[WS(rs, 3)];
Chris@42 105 Tf = Rp[WS(rs, 1)];
Chris@42 106 {
Chris@42 107 E T4, T1k, Td, T1l, Tg, T5;
Chris@42 108 T4 = Rp[WS(rs, 2)];
Chris@42 109 T1k = Tb - Tc;
Chris@42 110 Td = Tb + Tc;
Chris@42 111 T1l = Te - Tf;
Chris@42 112 Tg = Te + Tf;
Chris@42 113 T5 = Rm[WS(rs, 2)];
Chris@42 114 T7 = Rm[WS(rs, 1)];
Chris@42 115 T1m = T1k + T1l;
Chris@42 116 T1J = T1k - T1l;
Chris@42 117 TO = Td - Tg;
Chris@42 118 Th = Td + Tg;
Chris@42 119 T1h = T4 - T5;
Chris@42 120 T6 = T4 + T5;
Chris@42 121 T8 = Rp[WS(rs, 3)];
Chris@42 122 }
Chris@42 123 }
Chris@42 124 }
Chris@42 125 }
Chris@42 126 Rm[0] = TH + TK;
Chris@42 127 T1i = T7 - T8;
Chris@42 128 T9 = T7 + T8;
Chris@42 129 {
Chris@42 130 E T2d, T1F, T29, T1I, TP, T2c, T1p, Tl, T1o, Tk, T2b, T2e, T17, T14, T13;
Chris@42 131 T2d = T1B + T1E;
Chris@42 132 T1F = FNMS(KP250000000, T1E, T1B);
Chris@42 133 {
Chris@42 134 E T1j, Ta, T1n, Ti, T2a;
Chris@42 135 T29 = W[8];
Chris@42 136 T1I = T1h - T1i;
Chris@42 137 T1j = T1h + T1i;
Chris@42 138 TP = T6 - T9;
Chris@42 139 Ta = T6 + T9;
Chris@42 140 T2c = W[9];
Chris@42 141 T1p = T1j - T1m;
Chris@42 142 T1n = T1j + T1m;
Chris@42 143 Tl = Ta - Th;
Chris@42 144 Ti = Ta + Th;
Chris@42 145 T1o = FNMS(KP250000000, T1n, T1g);
Chris@42 146 T2a = T1g + T1n;
Chris@42 147 Rp[0] = T3 + Ti;
Chris@42 148 Tk = FNMS(KP250000000, Ti, T3);
Chris@42 149 T2b = T29 * T2a;
Chris@42 150 T2e = T2c * T2a;
Chris@42 151 }
Chris@42 152 {
Chris@42 153 E T16, TQ, T10, Tm, TL;
Chris@42 154 T16 = FMA(KP618033988, TO, TP);
Chris@42 155 TQ = FNMS(KP618033988, TP, TO);
Chris@42 156 Ip[WS(rs, 2)] = FNMS(T2c, T2d, T2b);
Chris@42 157 Im[WS(rs, 2)] = FMA(T29, T2d, T2e);
Chris@42 158 T10 = FMA(KP559016994, Tl, Tk);
Chris@42 159 Tm = FNMS(KP559016994, Tl, Tk);
Chris@42 160 TL = FNMS(KP250000000, TK, TH);
Chris@42 161 {
Chris@42 162 E TE, TU, T12, TR, TX, T1d, T1c, T19, TD, T1e, T1b, TW, TT;
Chris@42 163 {
Chris@42 164 E TC, T15, T1a, TS, Tj, TN;
Chris@42 165 TE = W[3];
Chris@42 166 TC = FMA(KP951056516, TB, Tm);
Chris@42 167 TU = FNMS(KP951056516, TB, Tm);
Chris@42 168 TN = FNMS(KP559016994, TM, TL);
Chris@42 169 T15 = FMA(KP559016994, TM, TL);
Chris@42 170 T12 = FMA(KP951056516, T11, T10);
Chris@42 171 T1a = FNMS(KP951056516, T11, T10);
Chris@42 172 TS = TE * TC;
Chris@42 173 TR = FNMS(KP951056516, TQ, TN);
Chris@42 174 TX = FMA(KP951056516, TQ, TN);
Chris@42 175 Tj = W[2];
Chris@42 176 T1d = FMA(KP951056516, T16, T15);
Chris@42 177 T17 = FNMS(KP951056516, T16, T15);
Chris@42 178 T1c = W[11];
Chris@42 179 T19 = W[10];
Chris@42 180 Rm[WS(rs, 1)] = FMA(Tj, TR, TS);
Chris@42 181 TD = Tj * TC;
Chris@42 182 T1e = T1c * T1a;
Chris@42 183 T1b = T19 * T1a;
Chris@42 184 }
Chris@42 185 Rp[WS(rs, 1)] = FNMS(TE, TR, TD);
Chris@42 186 Rm[WS(rs, 3)] = FMA(T19, T1d, T1e);
Chris@42 187 Rp[WS(rs, 3)] = FNMS(T1c, T1d, T1b);
Chris@42 188 TW = W[15];
Chris@42 189 TT = W[14];
Chris@42 190 {
Chris@42 191 E TZ, T18, TY, TV;
Chris@42 192 T14 = W[7];
Chris@42 193 TY = TW * TU;
Chris@42 194 TV = TT * TU;
Chris@42 195 TZ = W[6];
Chris@42 196 T18 = T14 * T12;
Chris@42 197 Rm[WS(rs, 4)] = FMA(TT, TX, TY);
Chris@42 198 Rp[WS(rs, 4)] = FNMS(TW, TX, TV);
Chris@42 199 T13 = TZ * T12;
Chris@42 200 Rm[WS(rs, 2)] = FMA(TZ, T17, T18);
Chris@42 201 }
Chris@42 202 }
Chris@42 203 }
Chris@42 204 {
Chris@42 205 E T20, T1K, T1q, T1U;
Chris@42 206 T20 = FNMS(KP618033988, T1I, T1J);
Chris@42 207 T1K = FMA(KP618033988, T1J, T1I);
Chris@42 208 Rp[WS(rs, 2)] = FNMS(T14, T17, T13);
Chris@42 209 T1q = FMA(KP559016994, T1p, T1o);
Chris@42 210 T1U = FNMS(KP559016994, T1p, T1o);
Chris@42 211 {
Chris@42 212 E T1A, T1O, T1W, T1R, T1L, T27, T26, T23, T1z, T28, T25, T1Q, T1N;
Chris@42 213 {
Chris@42 214 E T1y, T1Z, T24, T1M, T1f, T1H;
Chris@42 215 T1A = W[1];
Chris@42 216 T1O = FMA(KP951056516, T1x, T1q);
Chris@42 217 T1y = FNMS(KP951056516, T1x, T1q);
Chris@42 218 T1Z = FNMS(KP559016994, T1G, T1F);
Chris@42 219 T1H = FMA(KP559016994, T1G, T1F);
Chris@42 220 T24 = FMA(KP951056516, T1V, T1U);
Chris@42 221 T1W = FNMS(KP951056516, T1V, T1U);
Chris@42 222 T1M = T1A * T1y;
Chris@42 223 T1R = FNMS(KP951056516, T1K, T1H);
Chris@42 224 T1L = FMA(KP951056516, T1K, T1H);
Chris@42 225 T1f = W[0];
Chris@42 226 T21 = FMA(KP951056516, T20, T1Z);
Chris@42 227 T27 = FNMS(KP951056516, T20, T1Z);
Chris@42 228 T26 = W[13];
Chris@42 229 T23 = W[12];
Chris@42 230 Im[0] = FMA(T1f, T1L, T1M);
Chris@42 231 T1z = T1f * T1y;
Chris@42 232 T28 = T26 * T24;
Chris@42 233 T25 = T23 * T24;
Chris@42 234 }
Chris@42 235 Ip[0] = FNMS(T1A, T1L, T1z);
Chris@42 236 Im[WS(rs, 3)] = FMA(T23, T27, T28);
Chris@42 237 Ip[WS(rs, 3)] = FNMS(T26, T27, T25);
Chris@42 238 T1Q = W[17];
Chris@42 239 T1N = W[16];
Chris@42 240 {
Chris@42 241 E T1T, T22, T1S, T1P;
Chris@42 242 T1Y = W[5];
Chris@42 243 T1S = T1Q * T1O;
Chris@42 244 T1P = T1N * T1O;
Chris@42 245 T1T = W[4];
Chris@42 246 T22 = T1Y * T1W;
Chris@42 247 Im[WS(rs, 4)] = FMA(T1N, T1R, T1S);
Chris@42 248 Ip[WS(rs, 4)] = FNMS(T1Q, T1R, T1P);
Chris@42 249 T1X = T1T * T1W;
Chris@42 250 Im[WS(rs, 1)] = FMA(T1T, T21, T22);
Chris@42 251 }
Chris@42 252 }
Chris@42 253 }
Chris@42 254 }
Chris@42 255 }
Chris@42 256 Ip[WS(rs, 1)] = FNMS(T1Y, T21, T1X);
Chris@42 257 }
Chris@42 258 }
Chris@42 259 }
Chris@42 260
Chris@42 261 static const tw_instr twinstr[] = {
Chris@42 262 {TW_FULL, 1, 10},
Chris@42 263 {TW_NEXT, 1, 0}
Chris@42 264 };
Chris@42 265
Chris@42 266 static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@42 267
Chris@42 268 void X(codelet_hc2cb_10) (planner *p) {
Chris@42 269 X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT);
Chris@42 270 }
Chris@42 271 #else /* HAVE_FMA */
Chris@42 272
Chris@42 273 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include hc2cb.h */
Chris@42 274
Chris@42 275 /*
Chris@42 276 * This function contains 102 FP additions, 60 FP multiplications,
Chris@42 277 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@42 278 * 39 stack variables, 4 constants, and 40 memory accesses
Chris@42 279 */
Chris@42 280 #include "hc2cb.h"
Chris@42 281
Chris@42 282 static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 283 {
Chris@42 284 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 285 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 286 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 287 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 288 {
Chris@42 289 INT m;
Chris@42 290 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@42 291 E T3, T18, TJ, T1i, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, Tt, TA, T1w;
Chris@42 292 E T1v, T1p, T1E, TM, TO;
Chris@42 293 {
Chris@42 294 E T1, T2, TH, TI;
Chris@42 295 T1 = Rp[0];
Chris@42 296 T2 = Rm[WS(rs, 4)];
Chris@42 297 T3 = T1 + T2;
Chris@42 298 T18 = T1 - T2;
Chris@42 299 TH = Ip[0];
Chris@42 300 TI = Im[WS(rs, 4)];
Chris@42 301 TJ = TH - TI;
Chris@42 302 T1i = TH + TI;
Chris@42 303 }
Chris@42 304 {
Chris@42 305 E T6, T19, Tg, T1d, T9, T1a, Td, T1c;
Chris@42 306 {
Chris@42 307 E T4, T5, Te, Tf;
Chris@42 308 T4 = Rp[WS(rs, 2)];
Chris@42 309 T5 = Rm[WS(rs, 2)];
Chris@42 310 T6 = T4 + T5;
Chris@42 311 T19 = T4 - T5;
Chris@42 312 Te = Rm[WS(rs, 3)];
Chris@42 313 Tf = Rp[WS(rs, 1)];
Chris@42 314 Tg = Te + Tf;
Chris@42 315 T1d = Te - Tf;
Chris@42 316 }
Chris@42 317 {
Chris@42 318 E T7, T8, Tb, Tc;
Chris@42 319 T7 = Rm[WS(rs, 1)];
Chris@42 320 T8 = Rp[WS(rs, 3)];
Chris@42 321 T9 = T7 + T8;
Chris@42 322 T1a = T7 - T8;
Chris@42 323 Tb = Rp[WS(rs, 4)];
Chris@42 324 Tc = Rm[0];
Chris@42 325 Td = Tb + Tc;
Chris@42 326 T1c = Tb - Tc;
Chris@42 327 }
Chris@42 328 TE = T6 - T9;
Chris@42 329 TF = Td - Tg;
Chris@42 330 T1B = T1c - T1d;
Chris@42 331 T1A = T19 - T1a;
Chris@42 332 {
Chris@42 333 E T1b, T1e, Ta, Th;
Chris@42 334 T1b = T19 + T1a;
Chris@42 335 T1e = T1c + T1d;
Chris@42 336 T1f = T1b + T1e;
Chris@42 337 T1t = KP559016994 * (T1b - T1e);
Chris@42 338 Ta = T6 + T9;
Chris@42 339 Th = Td + Tg;
Chris@42 340 Ti = Ta + Th;
Chris@42 341 Tl = KP559016994 * (Ta - Th);
Chris@42 342 }
Chris@42 343 }
Chris@42 344 {
Chris@42 345 E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m;
Chris@42 346 {
Chris@42 347 E Tn, To, Tx, Ty;
Chris@42 348 Tn = Ip[WS(rs, 2)];
Chris@42 349 To = Im[WS(rs, 2)];
Chris@42 350 Tp = Tn - To;
Chris@42 351 T1j = Tn + To;
Chris@42 352 Tx = Ip[WS(rs, 1)];
Chris@42 353 Ty = Im[WS(rs, 3)];
Chris@42 354 Tz = Tx - Ty;
Chris@42 355 T1n = Tx + Ty;
Chris@42 356 }
Chris@42 357 {
Chris@42 358 E Tq, Tr, Tu, Tv;
Chris@42 359 Tq = Ip[WS(rs, 3)];
Chris@42 360 Tr = Im[WS(rs, 1)];
Chris@42 361 Ts = Tq - Tr;
Chris@42 362 T1k = Tq + Tr;
Chris@42 363 Tu = Ip[WS(rs, 4)];
Chris@42 364 Tv = Im[0];
Chris@42 365 Tw = Tu - Tv;
Chris@42 366 T1m = Tu + Tv;
Chris@42 367 }
Chris@42 368 Tt = Tp - Ts;
Chris@42 369 TA = Tw - Tz;
Chris@42 370 T1w = T1m + T1n;
Chris@42 371 T1v = T1j + T1k;
Chris@42 372 {
Chris@42 373 E T1l, T1o, TK, TL;
Chris@42 374 T1l = T1j - T1k;
Chris@42 375 T1o = T1m - T1n;
Chris@42 376 T1p = T1l + T1o;
Chris@42 377 T1E = KP559016994 * (T1l - T1o);
Chris@42 378 TK = Tp + Ts;
Chris@42 379 TL = Tw + Tz;
Chris@42 380 TM = TK + TL;
Chris@42 381 TO = KP559016994 * (TK - TL);
Chris@42 382 }
Chris@42 383 }
Chris@42 384 Rp[0] = T3 + Ti;
Chris@42 385 Rm[0] = TJ + TM;
Chris@42 386 {
Chris@42 387 E T1g, T1q, T17, T1h;
Chris@42 388 T1g = T18 + T1f;
Chris@42 389 T1q = T1i + T1p;
Chris@42 390 T17 = W[8];
Chris@42 391 T1h = W[9];
Chris@42 392 Ip[WS(rs, 2)] = FNMS(T1h, T1q, T17 * T1g);
Chris@42 393 Im[WS(rs, 2)] = FMA(T1h, T1g, T17 * T1q);
Chris@42 394 }
Chris@42 395 {
Chris@42 396 E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk;
Chris@42 397 TB = FNMS(KP951056516, TA, KP587785252 * Tt);
Chris@42 398 TG = FNMS(KP951056516, TF, KP587785252 * TE);
Chris@42 399 T11 = FMA(KP951056516, TE, KP587785252 * TF);
Chris@42 400 TX = FMA(KP951056516, Tt, KP587785252 * TA);
Chris@42 401 TN = FNMS(KP250000000, TM, TJ);
Chris@42 402 TP = TN - TO;
Chris@42 403 T10 = TO + TN;
Chris@42 404 Tk = FNMS(KP250000000, Ti, T3);
Chris@42 405 Tm = Tk - Tl;
Chris@42 406 TW = Tl + Tk;
Chris@42 407 {
Chris@42 408 E TC, TQ, Tj, TD;
Chris@42 409 TC = Tm - TB;
Chris@42 410 TQ = TG + TP;
Chris@42 411 Tj = W[2];
Chris@42 412 TD = W[3];
Chris@42 413 Rp[WS(rs, 1)] = FNMS(TD, TQ, Tj * TC);
Chris@42 414 Rm[WS(rs, 1)] = FMA(TD, TC, Tj * TQ);
Chris@42 415 }
Chris@42 416 {
Chris@42 417 E T14, T16, T13, T15;
Chris@42 418 T14 = TW - TX;
Chris@42 419 T16 = T11 + T10;
Chris@42 420 T13 = W[10];
Chris@42 421 T15 = W[11];
Chris@42 422 Rp[WS(rs, 3)] = FNMS(T15, T16, T13 * T14);
Chris@42 423 Rm[WS(rs, 3)] = FMA(T15, T14, T13 * T16);
Chris@42 424 }
Chris@42 425 {
Chris@42 426 E TS, TU, TR, TT;
Chris@42 427 TS = Tm + TB;
Chris@42 428 TU = TP - TG;
Chris@42 429 TR = W[14];
Chris@42 430 TT = W[15];
Chris@42 431 Rp[WS(rs, 4)] = FNMS(TT, TU, TR * TS);
Chris@42 432 Rm[WS(rs, 4)] = FMA(TT, TS, TR * TU);
Chris@42 433 }
Chris@42 434 {
Chris@42 435 E TY, T12, TV, TZ;
Chris@42 436 TY = TW + TX;
Chris@42 437 T12 = T10 - T11;
Chris@42 438 TV = W[6];
Chris@42 439 TZ = W[7];
Chris@42 440 Rp[WS(rs, 2)] = FNMS(TZ, T12, TV * TY);
Chris@42 441 Rm[WS(rs, 2)] = FMA(TZ, TY, TV * T12);
Chris@42 442 }
Chris@42 443 }
Chris@42 444 {
Chris@42 445 E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s;
Chris@42 446 T1x = FNMS(KP951056516, T1w, KP587785252 * T1v);
Chris@42 447 T1C = FNMS(KP951056516, T1B, KP587785252 * T1A);
Chris@42 448 T1Q = FMA(KP951056516, T1A, KP587785252 * T1B);
Chris@42 449 T1N = FMA(KP951056516, T1v, KP587785252 * T1w);
Chris@42 450 T1D = FNMS(KP250000000, T1p, T1i);
Chris@42 451 T1F = T1D - T1E;
Chris@42 452 T1R = T1E + T1D;
Chris@42 453 T1s = FNMS(KP250000000, T1f, T18);
Chris@42 454 T1u = T1s - T1t;
Chris@42 455 T1M = T1t + T1s;
Chris@42 456 {
Chris@42 457 E T1y, T1G, T1r, T1z;
Chris@42 458 T1y = T1u - T1x;
Chris@42 459 T1G = T1C + T1F;
Chris@42 460 T1r = W[12];
Chris@42 461 T1z = W[13];
Chris@42 462 Ip[WS(rs, 3)] = FNMS(T1z, T1G, T1r * T1y);
Chris@42 463 Im[WS(rs, 3)] = FMA(T1r, T1G, T1z * T1y);
Chris@42 464 }
Chris@42 465 {
Chris@42 466 E T1U, T1W, T1T, T1V;
Chris@42 467 T1U = T1M + T1N;
Chris@42 468 T1W = T1R - T1Q;
Chris@42 469 T1T = W[16];
Chris@42 470 T1V = W[17];
Chris@42 471 Ip[WS(rs, 4)] = FNMS(T1V, T1W, T1T * T1U);
Chris@42 472 Im[WS(rs, 4)] = FMA(T1T, T1W, T1V * T1U);
Chris@42 473 }
Chris@42 474 {
Chris@42 475 E T1I, T1K, T1H, T1J;
Chris@42 476 T1I = T1u + T1x;
Chris@42 477 T1K = T1F - T1C;
Chris@42 478 T1H = W[4];
Chris@42 479 T1J = W[5];
Chris@42 480 Ip[WS(rs, 1)] = FNMS(T1J, T1K, T1H * T1I);
Chris@42 481 Im[WS(rs, 1)] = FMA(T1H, T1K, T1J * T1I);
Chris@42 482 }
Chris@42 483 {
Chris@42 484 E T1O, T1S, T1L, T1P;
Chris@42 485 T1O = T1M - T1N;
Chris@42 486 T1S = T1Q + T1R;
Chris@42 487 T1L = W[0];
Chris@42 488 T1P = W[1];
Chris@42 489 Ip[0] = FNMS(T1P, T1S, T1L * T1O);
Chris@42 490 Im[0] = FMA(T1L, T1S, T1P * T1O);
Chris@42 491 }
Chris@42 492 }
Chris@42 493 }
Chris@42 494 }
Chris@42 495 }
Chris@42 496
Chris@42 497 static const tw_instr twinstr[] = {
Chris@42 498 {TW_FULL, 1, 10},
Chris@42 499 {TW_NEXT, 1, 0}
Chris@42 500 };
Chris@42 501
Chris@42 502 static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@42 503
Chris@42 504 void X(codelet_hc2cb_10) (planner *p) {
Chris@42 505 X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT);
Chris@42 506 }
Chris@42 507 #endif /* HAVE_FMA */