annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:52 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@82 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@82 33 * 47 stack variables, 4 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 46 E TH, T1B, TB, T11, T1E, T1G, TK, TM, T1x, T1V, T3, T1g, Tl, T1I, T1J;
Chris@82 47 E TO, TP, T1p, Ti, Tk, T1n, T1o, TF, TG;
Chris@82 48 TF = Ip[0];
Chris@82 49 TG = Im[WS(rs, 4)];
Chris@82 50 TH = TF - TG;
Chris@82 51 T1B = TF + TG;
Chris@82 52 {
Chris@82 53 E Tp, T1u, Tz, T1s, Ts, T1v, Tw, T1r;
Chris@82 54 {
Chris@82 55 E Tn, To, Tx, Ty;
Chris@82 56 Tn = Ip[WS(rs, 4)];
Chris@82 57 To = Im[0];
Chris@82 58 Tp = Tn - To;
Chris@82 59 T1u = Tn + To;
Chris@82 60 Tx = Ip[WS(rs, 3)];
Chris@82 61 Ty = Im[WS(rs, 1)];
Chris@82 62 Tz = Tx - Ty;
Chris@82 63 T1s = Tx + Ty;
Chris@82 64 }
Chris@82 65 {
Chris@82 66 E Tq, Tr, Tu, Tv;
Chris@82 67 Tq = Ip[WS(rs, 1)];
Chris@82 68 Tr = Im[WS(rs, 3)];
Chris@82 69 Ts = Tq - Tr;
Chris@82 70 T1v = Tq + Tr;
Chris@82 71 Tu = Ip[WS(rs, 2)];
Chris@82 72 Tv = Im[WS(rs, 2)];
Chris@82 73 Tw = Tu - Tv;
Chris@82 74 T1r = Tu + Tv;
Chris@82 75 }
Chris@82 76 {
Chris@82 77 E Tt, TA, T1C, T1D;
Chris@82 78 Tt = Tp - Ts;
Chris@82 79 TA = Tw - Tz;
Chris@82 80 TB = FNMS(KP618033988, TA, Tt);
Chris@82 81 T11 = FMA(KP618033988, Tt, TA);
Chris@82 82 T1C = T1r - T1s;
Chris@82 83 T1D = T1u - T1v;
Chris@82 84 T1E = T1C + T1D;
Chris@82 85 T1G = T1C - T1D;
Chris@82 86 }
Chris@82 87 {
Chris@82 88 E TI, TJ, T1t, T1w;
Chris@82 89 TI = Tw + Tz;
Chris@82 90 TJ = Tp + Ts;
Chris@82 91 TK = TI + TJ;
Chris@82 92 TM = TI - TJ;
Chris@82 93 T1t = T1r + T1s;
Chris@82 94 T1w = T1u + T1v;
Chris@82 95 T1x = FMA(KP618033988, T1w, T1t);
Chris@82 96 T1V = FNMS(KP618033988, T1t, T1w);
Chris@82 97 }
Chris@82 98 }
Chris@82 99 {
Chris@82 100 E Td, T1k, Tg, T1l, Th, T1m, T6, T1h, T9, T1i, Ta, T1j, T1, T2;
Chris@82 101 T1 = Rp[0];
Chris@82 102 T2 = Rm[WS(rs, 4)];
Chris@82 103 T3 = T1 + T2;
Chris@82 104 T1g = T1 - T2;
Chris@82 105 {
Chris@82 106 E Tb, Tc, Te, Tf;
Chris@82 107 Tb = Rp[WS(rs, 4)];
Chris@82 108 Tc = Rm[0];
Chris@82 109 Td = Tb + Tc;
Chris@82 110 T1k = Tb - Tc;
Chris@82 111 Te = Rm[WS(rs, 3)];
Chris@82 112 Tf = Rp[WS(rs, 1)];
Chris@82 113 Tg = Te + Tf;
Chris@82 114 T1l = Te - Tf;
Chris@82 115 }
Chris@82 116 Th = Td + Tg;
Chris@82 117 T1m = T1k + T1l;
Chris@82 118 {
Chris@82 119 E T4, T5, T7, T8;
Chris@82 120 T4 = Rp[WS(rs, 2)];
Chris@82 121 T5 = Rm[WS(rs, 2)];
Chris@82 122 T6 = T4 + T5;
Chris@82 123 T1h = T4 - T5;
Chris@82 124 T7 = Rm[WS(rs, 1)];
Chris@82 125 T8 = Rp[WS(rs, 3)];
Chris@82 126 T9 = T7 + T8;
Chris@82 127 T1i = T7 - T8;
Chris@82 128 }
Chris@82 129 Ta = T6 + T9;
Chris@82 130 T1j = T1h + T1i;
Chris@82 131 Tl = Ta - Th;
Chris@82 132 T1I = T1h - T1i;
Chris@82 133 T1J = T1k - T1l;
Chris@82 134 TO = Td - Tg;
Chris@82 135 TP = T6 - T9;
Chris@82 136 T1p = T1j - T1m;
Chris@82 137 Ti = Ta + Th;
Chris@82 138 Tk = FNMS(KP250000000, Ti, T3);
Chris@82 139 T1n = T1j + T1m;
Chris@82 140 T1o = FNMS(KP250000000, T1n, T1g);
Chris@82 141 }
Chris@82 142 Rp[0] = T3 + Ti;
Chris@82 143 Rm[0] = TH + TK;
Chris@82 144 {
Chris@82 145 E T2d, T29, T2b, T2c, T2e, T2a;
Chris@82 146 T2d = T1B + T1E;
Chris@82 147 T2a = T1g + T1n;
Chris@82 148 T29 = W[8];
Chris@82 149 T2b = T29 * T2a;
Chris@82 150 T2c = W[9];
Chris@82 151 T2e = T2c * T2a;
Chris@82 152 Ip[WS(rs, 2)] = FNMS(T2c, T2d, T2b);
Chris@82 153 Im[WS(rs, 2)] = FMA(T29, T2d, T2e);
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E TQ, T16, TC, TU, TN, T15, T12, T1a, Tm, TL, T10;
Chris@82 157 TQ = FNMS(KP618033988, TP, TO);
Chris@82 158 T16 = FMA(KP618033988, TO, TP);
Chris@82 159 Tm = FNMS(KP559016994, Tl, Tk);
Chris@82 160 TC = FMA(KP951056516, TB, Tm);
Chris@82 161 TU = FNMS(KP951056516, TB, Tm);
Chris@82 162 TL = FNMS(KP250000000, TK, TH);
Chris@82 163 TN = FNMS(KP559016994, TM, TL);
Chris@82 164 T15 = FMA(KP559016994, TM, TL);
Chris@82 165 T10 = FMA(KP559016994, Tl, Tk);
Chris@82 166 T12 = FMA(KP951056516, T11, T10);
Chris@82 167 T1a = FNMS(KP951056516, T11, T10);
Chris@82 168 {
Chris@82 169 E TR, TE, TS, Tj, TD;
Chris@82 170 TR = FNMS(KP951056516, TQ, TN);
Chris@82 171 TE = W[3];
Chris@82 172 TS = TE * TC;
Chris@82 173 Tj = W[2];
Chris@82 174 TD = Tj * TC;
Chris@82 175 Rp[WS(rs, 1)] = FNMS(TE, TR, TD);
Chris@82 176 Rm[WS(rs, 1)] = FMA(Tj, TR, TS);
Chris@82 177 }
Chris@82 178 {
Chris@82 179 E T1d, T1c, T1e, T19, T1b;
Chris@82 180 T1d = FMA(KP951056516, T16, T15);
Chris@82 181 T1c = W[11];
Chris@82 182 T1e = T1c * T1a;
Chris@82 183 T19 = W[10];
Chris@82 184 T1b = T19 * T1a;
Chris@82 185 Rp[WS(rs, 3)] = FNMS(T1c, T1d, T1b);
Chris@82 186 Rm[WS(rs, 3)] = FMA(T19, T1d, T1e);
Chris@82 187 }
Chris@82 188 {
Chris@82 189 E TX, TW, TY, TT, TV;
Chris@82 190 TX = FMA(KP951056516, TQ, TN);
Chris@82 191 TW = W[15];
Chris@82 192 TY = TW * TU;
Chris@82 193 TT = W[14];
Chris@82 194 TV = TT * TU;
Chris@82 195 Rp[WS(rs, 4)] = FNMS(TW, TX, TV);
Chris@82 196 Rm[WS(rs, 4)] = FMA(TT, TX, TY);
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T17, T14, T18, TZ, T13;
Chris@82 200 T17 = FNMS(KP951056516, T16, T15);
Chris@82 201 T14 = W[7];
Chris@82 202 T18 = T14 * T12;
Chris@82 203 TZ = W[6];
Chris@82 204 T13 = TZ * T12;
Chris@82 205 Rp[WS(rs, 2)] = FNMS(T14, T17, T13);
Chris@82 206 Rm[WS(rs, 2)] = FMA(TZ, T17, T18);
Chris@82 207 }
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E T1K, T20, T1y, T1O, T1H, T1Z, T1W, T24, T1q, T1F, T1U;
Chris@82 211 T1K = FMA(KP618033988, T1J, T1I);
Chris@82 212 T20 = FNMS(KP618033988, T1I, T1J);
Chris@82 213 T1q = FMA(KP559016994, T1p, T1o);
Chris@82 214 T1y = FNMS(KP951056516, T1x, T1q);
Chris@82 215 T1O = FMA(KP951056516, T1x, T1q);
Chris@82 216 T1F = FNMS(KP250000000, T1E, T1B);
Chris@82 217 T1H = FMA(KP559016994, T1G, T1F);
Chris@82 218 T1Z = FNMS(KP559016994, T1G, T1F);
Chris@82 219 T1U = FNMS(KP559016994, T1p, T1o);
Chris@82 220 T1W = FNMS(KP951056516, T1V, T1U);
Chris@82 221 T24 = FMA(KP951056516, T1V, T1U);
Chris@82 222 {
Chris@82 223 E T1L, T1A, T1M, T1f, T1z;
Chris@82 224 T1L = FMA(KP951056516, T1K, T1H);
Chris@82 225 T1A = W[1];
Chris@82 226 T1M = T1A * T1y;
Chris@82 227 T1f = W[0];
Chris@82 228 T1z = T1f * T1y;
Chris@82 229 Ip[0] = FNMS(T1A, T1L, T1z);
Chris@82 230 Im[0] = FMA(T1f, T1L, T1M);
Chris@82 231 }
Chris@82 232 {
Chris@82 233 E T27, T26, T28, T23, T25;
Chris@82 234 T27 = FNMS(KP951056516, T20, T1Z);
Chris@82 235 T26 = W[13];
Chris@82 236 T28 = T26 * T24;
Chris@82 237 T23 = W[12];
Chris@82 238 T25 = T23 * T24;
Chris@82 239 Ip[WS(rs, 3)] = FNMS(T26, T27, T25);
Chris@82 240 Im[WS(rs, 3)] = FMA(T23, T27, T28);
Chris@82 241 }
Chris@82 242 {
Chris@82 243 E T1R, T1Q, T1S, T1N, T1P;
Chris@82 244 T1R = FNMS(KP951056516, T1K, T1H);
Chris@82 245 T1Q = W[17];
Chris@82 246 T1S = T1Q * T1O;
Chris@82 247 T1N = W[16];
Chris@82 248 T1P = T1N * T1O;
Chris@82 249 Ip[WS(rs, 4)] = FNMS(T1Q, T1R, T1P);
Chris@82 250 Im[WS(rs, 4)] = FMA(T1N, T1R, T1S);
Chris@82 251 }
Chris@82 252 {
Chris@82 253 E T21, T1Y, T22, T1T, T1X;
Chris@82 254 T21 = FMA(KP951056516, T20, T1Z);
Chris@82 255 T1Y = W[5];
Chris@82 256 T22 = T1Y * T1W;
Chris@82 257 T1T = W[4];
Chris@82 258 T1X = T1T * T1W;
Chris@82 259 Ip[WS(rs, 1)] = FNMS(T1Y, T21, T1X);
Chris@82 260 Im[WS(rs, 1)] = FMA(T1T, T21, T22);
Chris@82 261 }
Chris@82 262 }
Chris@82 263 }
Chris@82 264 }
Chris@82 265 }
Chris@82 266
Chris@82 267 static const tw_instr twinstr[] = {
Chris@82 268 {TW_FULL, 1, 10},
Chris@82 269 {TW_NEXT, 1, 0}
Chris@82 270 };
Chris@82 271
Chris@82 272 static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@82 273
Chris@82 274 void X(codelet_hc2cb_10) (planner *p) {
Chris@82 275 X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT);
Chris@82 276 }
Chris@82 277 #else
Chris@82 278
Chris@82 279 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cb_10 -include rdft/scalar/hc2cb.h */
Chris@82 280
Chris@82 281 /*
Chris@82 282 * This function contains 102 FP additions, 60 FP multiplications,
Chris@82 283 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 284 * 39 stack variables, 4 constants, and 40 memory accesses
Chris@82 285 */
Chris@82 286 #include "rdft/scalar/hc2cb.h"
Chris@82 287
Chris@82 288 static void hc2cb_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 289 {
Chris@82 290 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 291 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 292 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 293 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 294 {
Chris@82 295 INT m;
Chris@82 296 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 297 E T3, T18, TJ, T1i, TE, TF, T1B, T1A, T1f, T1t, Ti, Tl, Tt, TA, T1w;
Chris@82 298 E T1v, T1p, T1E, TM, TO;
Chris@82 299 {
Chris@82 300 E T1, T2, TH, TI;
Chris@82 301 T1 = Rp[0];
Chris@82 302 T2 = Rm[WS(rs, 4)];
Chris@82 303 T3 = T1 + T2;
Chris@82 304 T18 = T1 - T2;
Chris@82 305 TH = Ip[0];
Chris@82 306 TI = Im[WS(rs, 4)];
Chris@82 307 TJ = TH - TI;
Chris@82 308 T1i = TH + TI;
Chris@82 309 }
Chris@82 310 {
Chris@82 311 E T6, T19, Tg, T1d, T9, T1a, Td, T1c;
Chris@82 312 {
Chris@82 313 E T4, T5, Te, Tf;
Chris@82 314 T4 = Rp[WS(rs, 2)];
Chris@82 315 T5 = Rm[WS(rs, 2)];
Chris@82 316 T6 = T4 + T5;
Chris@82 317 T19 = T4 - T5;
Chris@82 318 Te = Rm[WS(rs, 3)];
Chris@82 319 Tf = Rp[WS(rs, 1)];
Chris@82 320 Tg = Te + Tf;
Chris@82 321 T1d = Te - Tf;
Chris@82 322 }
Chris@82 323 {
Chris@82 324 E T7, T8, Tb, Tc;
Chris@82 325 T7 = Rm[WS(rs, 1)];
Chris@82 326 T8 = Rp[WS(rs, 3)];
Chris@82 327 T9 = T7 + T8;
Chris@82 328 T1a = T7 - T8;
Chris@82 329 Tb = Rp[WS(rs, 4)];
Chris@82 330 Tc = Rm[0];
Chris@82 331 Td = Tb + Tc;
Chris@82 332 T1c = Tb - Tc;
Chris@82 333 }
Chris@82 334 TE = T6 - T9;
Chris@82 335 TF = Td - Tg;
Chris@82 336 T1B = T1c - T1d;
Chris@82 337 T1A = T19 - T1a;
Chris@82 338 {
Chris@82 339 E T1b, T1e, Ta, Th;
Chris@82 340 T1b = T19 + T1a;
Chris@82 341 T1e = T1c + T1d;
Chris@82 342 T1f = T1b + T1e;
Chris@82 343 T1t = KP559016994 * (T1b - T1e);
Chris@82 344 Ta = T6 + T9;
Chris@82 345 Th = Td + Tg;
Chris@82 346 Ti = Ta + Th;
Chris@82 347 Tl = KP559016994 * (Ta - Th);
Chris@82 348 }
Chris@82 349 }
Chris@82 350 {
Chris@82 351 E Tp, T1j, Tz, T1n, Ts, T1k, Tw, T1m;
Chris@82 352 {
Chris@82 353 E Tn, To, Tx, Ty;
Chris@82 354 Tn = Ip[WS(rs, 2)];
Chris@82 355 To = Im[WS(rs, 2)];
Chris@82 356 Tp = Tn - To;
Chris@82 357 T1j = Tn + To;
Chris@82 358 Tx = Ip[WS(rs, 1)];
Chris@82 359 Ty = Im[WS(rs, 3)];
Chris@82 360 Tz = Tx - Ty;
Chris@82 361 T1n = Tx + Ty;
Chris@82 362 }
Chris@82 363 {
Chris@82 364 E Tq, Tr, Tu, Tv;
Chris@82 365 Tq = Ip[WS(rs, 3)];
Chris@82 366 Tr = Im[WS(rs, 1)];
Chris@82 367 Ts = Tq - Tr;
Chris@82 368 T1k = Tq + Tr;
Chris@82 369 Tu = Ip[WS(rs, 4)];
Chris@82 370 Tv = Im[0];
Chris@82 371 Tw = Tu - Tv;
Chris@82 372 T1m = Tu + Tv;
Chris@82 373 }
Chris@82 374 Tt = Tp - Ts;
Chris@82 375 TA = Tw - Tz;
Chris@82 376 T1w = T1m + T1n;
Chris@82 377 T1v = T1j + T1k;
Chris@82 378 {
Chris@82 379 E T1l, T1o, TK, TL;
Chris@82 380 T1l = T1j - T1k;
Chris@82 381 T1o = T1m - T1n;
Chris@82 382 T1p = T1l + T1o;
Chris@82 383 T1E = KP559016994 * (T1l - T1o);
Chris@82 384 TK = Tp + Ts;
Chris@82 385 TL = Tw + Tz;
Chris@82 386 TM = TK + TL;
Chris@82 387 TO = KP559016994 * (TK - TL);
Chris@82 388 }
Chris@82 389 }
Chris@82 390 Rp[0] = T3 + Ti;
Chris@82 391 Rm[0] = TJ + TM;
Chris@82 392 {
Chris@82 393 E T1g, T1q, T17, T1h;
Chris@82 394 T1g = T18 + T1f;
Chris@82 395 T1q = T1i + T1p;
Chris@82 396 T17 = W[8];
Chris@82 397 T1h = W[9];
Chris@82 398 Ip[WS(rs, 2)] = FNMS(T1h, T1q, T17 * T1g);
Chris@82 399 Im[WS(rs, 2)] = FMA(T1h, T1g, T17 * T1q);
Chris@82 400 }
Chris@82 401 {
Chris@82 402 E TB, TG, T11, TX, TP, T10, Tm, TW, TN, Tk;
Chris@82 403 TB = FNMS(KP951056516, TA, KP587785252 * Tt);
Chris@82 404 TG = FNMS(KP951056516, TF, KP587785252 * TE);
Chris@82 405 T11 = FMA(KP951056516, TE, KP587785252 * TF);
Chris@82 406 TX = FMA(KP951056516, Tt, KP587785252 * TA);
Chris@82 407 TN = FNMS(KP250000000, TM, TJ);
Chris@82 408 TP = TN - TO;
Chris@82 409 T10 = TO + TN;
Chris@82 410 Tk = FNMS(KP250000000, Ti, T3);
Chris@82 411 Tm = Tk - Tl;
Chris@82 412 TW = Tl + Tk;
Chris@82 413 {
Chris@82 414 E TC, TQ, Tj, TD;
Chris@82 415 TC = Tm - TB;
Chris@82 416 TQ = TG + TP;
Chris@82 417 Tj = W[2];
Chris@82 418 TD = W[3];
Chris@82 419 Rp[WS(rs, 1)] = FNMS(TD, TQ, Tj * TC);
Chris@82 420 Rm[WS(rs, 1)] = FMA(TD, TC, Tj * TQ);
Chris@82 421 }
Chris@82 422 {
Chris@82 423 E T14, T16, T13, T15;
Chris@82 424 T14 = TW - TX;
Chris@82 425 T16 = T11 + T10;
Chris@82 426 T13 = W[10];
Chris@82 427 T15 = W[11];
Chris@82 428 Rp[WS(rs, 3)] = FNMS(T15, T16, T13 * T14);
Chris@82 429 Rm[WS(rs, 3)] = FMA(T15, T14, T13 * T16);
Chris@82 430 }
Chris@82 431 {
Chris@82 432 E TS, TU, TR, TT;
Chris@82 433 TS = Tm + TB;
Chris@82 434 TU = TP - TG;
Chris@82 435 TR = W[14];
Chris@82 436 TT = W[15];
Chris@82 437 Rp[WS(rs, 4)] = FNMS(TT, TU, TR * TS);
Chris@82 438 Rm[WS(rs, 4)] = FMA(TT, TS, TR * TU);
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E TY, T12, TV, TZ;
Chris@82 442 TY = TW + TX;
Chris@82 443 T12 = T10 - T11;
Chris@82 444 TV = W[6];
Chris@82 445 TZ = W[7];
Chris@82 446 Rp[WS(rs, 2)] = FNMS(TZ, T12, TV * TY);
Chris@82 447 Rm[WS(rs, 2)] = FMA(TZ, TY, TV * T12);
Chris@82 448 }
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E T1x, T1C, T1Q, T1N, T1F, T1R, T1u, T1M, T1D, T1s;
Chris@82 452 T1x = FNMS(KP951056516, T1w, KP587785252 * T1v);
Chris@82 453 T1C = FNMS(KP951056516, T1B, KP587785252 * T1A);
Chris@82 454 T1Q = FMA(KP951056516, T1A, KP587785252 * T1B);
Chris@82 455 T1N = FMA(KP951056516, T1v, KP587785252 * T1w);
Chris@82 456 T1D = FNMS(KP250000000, T1p, T1i);
Chris@82 457 T1F = T1D - T1E;
Chris@82 458 T1R = T1E + T1D;
Chris@82 459 T1s = FNMS(KP250000000, T1f, T18);
Chris@82 460 T1u = T1s - T1t;
Chris@82 461 T1M = T1t + T1s;
Chris@82 462 {
Chris@82 463 E T1y, T1G, T1r, T1z;
Chris@82 464 T1y = T1u - T1x;
Chris@82 465 T1G = T1C + T1F;
Chris@82 466 T1r = W[12];
Chris@82 467 T1z = W[13];
Chris@82 468 Ip[WS(rs, 3)] = FNMS(T1z, T1G, T1r * T1y);
Chris@82 469 Im[WS(rs, 3)] = FMA(T1r, T1G, T1z * T1y);
Chris@82 470 }
Chris@82 471 {
Chris@82 472 E T1U, T1W, T1T, T1V;
Chris@82 473 T1U = T1M + T1N;
Chris@82 474 T1W = T1R - T1Q;
Chris@82 475 T1T = W[16];
Chris@82 476 T1V = W[17];
Chris@82 477 Ip[WS(rs, 4)] = FNMS(T1V, T1W, T1T * T1U);
Chris@82 478 Im[WS(rs, 4)] = FMA(T1T, T1W, T1V * T1U);
Chris@82 479 }
Chris@82 480 {
Chris@82 481 E T1I, T1K, T1H, T1J;
Chris@82 482 T1I = T1u + T1x;
Chris@82 483 T1K = T1F - T1C;
Chris@82 484 T1H = W[4];
Chris@82 485 T1J = W[5];
Chris@82 486 Ip[WS(rs, 1)] = FNMS(T1J, T1K, T1H * T1I);
Chris@82 487 Im[WS(rs, 1)] = FMA(T1H, T1K, T1J * T1I);
Chris@82 488 }
Chris@82 489 {
Chris@82 490 E T1O, T1S, T1L, T1P;
Chris@82 491 T1O = T1M - T1N;
Chris@82 492 T1S = T1Q + T1R;
Chris@82 493 T1L = W[0];
Chris@82 494 T1P = W[1];
Chris@82 495 Ip[0] = FNMS(T1P, T1S, T1L * T1O);
Chris@82 496 Im[0] = FMA(T1L, T1S, T1P * T1O);
Chris@82 497 }
Chris@82 498 }
Chris@82 499 }
Chris@82 500 }
Chris@82 501 }
Chris@82 502
Chris@82 503 static const tw_instr twinstr[] = {
Chris@82 504 {TW_FULL, 1, 10},
Chris@82 505 {TW_NEXT, 1, 0}
Chris@82 506 };
Chris@82 507
Chris@82 508 static const hc2c_desc desc = { 10, "hc2cb_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@82 509
Chris@82 510 void X(codelet_hc2cb_10) (planner *p) {
Chris@82 511 X(khc2c_register) (p, hc2cb_10, &desc, HC2C_VIA_RDFT);
Chris@82 512 }
Chris@82 513 #endif