annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cf_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:56 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 102 FP additions, 72 FP multiplications,
Chris@82 32 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
Chris@82 33 * 47 stack variables, 4 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 46 E T8, T26, T12, T1U, TM, TZ, T10, T1I, T1J, T24, T16, T17, T18, T1h, T1m;
Chris@82 47 E T1P, Tl, Ty, Tz, T1F, T1G, T23, T13, T14, T15, T1s, T1x, T1O;
Chris@82 48 {
Chris@82 49 E T1, T1T, T3, T6, T4, T1R, T2, T7, T1S, T5;
Chris@82 50 T1 = Rp[0];
Chris@82 51 T1T = Rm[0];
Chris@82 52 T3 = Ip[WS(rs, 2)];
Chris@82 53 T6 = Im[WS(rs, 2)];
Chris@82 54 T2 = W[8];
Chris@82 55 T4 = T2 * T3;
Chris@82 56 T1R = T2 * T6;
Chris@82 57 T5 = W[9];
Chris@82 58 T7 = FMA(T5, T6, T4);
Chris@82 59 T1S = FNMS(T5, T3, T1R);
Chris@82 60 T8 = T1 - T7;
Chris@82 61 T26 = T1T - T1S;
Chris@82 62 T12 = T1 + T7;
Chris@82 63 T1U = T1S + T1T;
Chris@82 64 }
Chris@82 65 {
Chris@82 66 E TF, T1e, TY, T1l, TL, T1g, TS, T1j;
Chris@82 67 {
Chris@82 68 E TB, TE, TC, T1d, TA, TD;
Chris@82 69 TB = Rp[WS(rs, 2)];
Chris@82 70 TE = Rm[WS(rs, 2)];
Chris@82 71 TA = W[6];
Chris@82 72 TC = TA * TB;
Chris@82 73 T1d = TA * TE;
Chris@82 74 TD = W[7];
Chris@82 75 TF = FMA(TD, TE, TC);
Chris@82 76 T1e = FNMS(TD, TB, T1d);
Chris@82 77 }
Chris@82 78 {
Chris@82 79 E TU, TX, TV, T1k, TT, TW;
Chris@82 80 TU = Ip[0];
Chris@82 81 TX = Im[0];
Chris@82 82 TT = W[0];
Chris@82 83 TV = TT * TU;
Chris@82 84 T1k = TT * TX;
Chris@82 85 TW = W[1];
Chris@82 86 TY = FMA(TW, TX, TV);
Chris@82 87 T1l = FNMS(TW, TU, T1k);
Chris@82 88 }
Chris@82 89 {
Chris@82 90 E TH, TK, TI, T1f, TG, TJ;
Chris@82 91 TH = Ip[WS(rs, 4)];
Chris@82 92 TK = Im[WS(rs, 4)];
Chris@82 93 TG = W[16];
Chris@82 94 TI = TG * TH;
Chris@82 95 T1f = TG * TK;
Chris@82 96 TJ = W[17];
Chris@82 97 TL = FMA(TJ, TK, TI);
Chris@82 98 T1g = FNMS(TJ, TH, T1f);
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E TO, TR, TP, T1i, TN, TQ;
Chris@82 102 TO = Rp[WS(rs, 3)];
Chris@82 103 TR = Rm[WS(rs, 3)];
Chris@82 104 TN = W[10];
Chris@82 105 TP = TN * TO;
Chris@82 106 T1i = TN * TR;
Chris@82 107 TQ = W[11];
Chris@82 108 TS = FMA(TQ, TR, TP);
Chris@82 109 T1j = FNMS(TQ, TO, T1i);
Chris@82 110 }
Chris@82 111 TM = TF - TL;
Chris@82 112 TZ = TS - TY;
Chris@82 113 T10 = TM + TZ;
Chris@82 114 T1I = T1l - T1j;
Chris@82 115 T1J = T1g - T1e;
Chris@82 116 T24 = T1J + T1I;
Chris@82 117 T16 = TF + TL;
Chris@82 118 T17 = TS + TY;
Chris@82 119 T18 = T16 + T17;
Chris@82 120 T1h = T1e + T1g;
Chris@82 121 T1m = T1j + T1l;
Chris@82 122 T1P = T1h + T1m;
Chris@82 123 }
Chris@82 124 {
Chris@82 125 E Te, T1p, Tx, T1w, Tk, T1r, Tr, T1u;
Chris@82 126 {
Chris@82 127 E Ta, Td, Tb, T1o, T9, Tc;
Chris@82 128 Ta = Rp[WS(rs, 1)];
Chris@82 129 Td = Rm[WS(rs, 1)];
Chris@82 130 T9 = W[2];
Chris@82 131 Tb = T9 * Ta;
Chris@82 132 T1o = T9 * Td;
Chris@82 133 Tc = W[3];
Chris@82 134 Te = FMA(Tc, Td, Tb);
Chris@82 135 T1p = FNMS(Tc, Ta, T1o);
Chris@82 136 }
Chris@82 137 {
Chris@82 138 E Tt, Tw, Tu, T1v, Ts, Tv;
Chris@82 139 Tt = Ip[WS(rs, 1)];
Chris@82 140 Tw = Im[WS(rs, 1)];
Chris@82 141 Ts = W[4];
Chris@82 142 Tu = Ts * Tt;
Chris@82 143 T1v = Ts * Tw;
Chris@82 144 Tv = W[5];
Chris@82 145 Tx = FMA(Tv, Tw, Tu);
Chris@82 146 T1w = FNMS(Tv, Tt, T1v);
Chris@82 147 }
Chris@82 148 {
Chris@82 149 E Tg, Tj, Th, T1q, Tf, Ti;
Chris@82 150 Tg = Ip[WS(rs, 3)];
Chris@82 151 Tj = Im[WS(rs, 3)];
Chris@82 152 Tf = W[12];
Chris@82 153 Th = Tf * Tg;
Chris@82 154 T1q = Tf * Tj;
Chris@82 155 Ti = W[13];
Chris@82 156 Tk = FMA(Ti, Tj, Th);
Chris@82 157 T1r = FNMS(Ti, Tg, T1q);
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E Tn, Tq, To, T1t, Tm, Tp;
Chris@82 161 Tn = Rp[WS(rs, 4)];
Chris@82 162 Tq = Rm[WS(rs, 4)];
Chris@82 163 Tm = W[14];
Chris@82 164 To = Tm * Tn;
Chris@82 165 T1t = Tm * Tq;
Chris@82 166 Tp = W[15];
Chris@82 167 Tr = FMA(Tp, Tq, To);
Chris@82 168 T1u = FNMS(Tp, Tn, T1t);
Chris@82 169 }
Chris@82 170 Tl = Te - Tk;
Chris@82 171 Ty = Tr - Tx;
Chris@82 172 Tz = Tl + Ty;
Chris@82 173 T1F = T1w - T1u;
Chris@82 174 T1G = T1r - T1p;
Chris@82 175 T23 = T1G + T1F;
Chris@82 176 T13 = Te + Tk;
Chris@82 177 T14 = Tr + Tx;
Chris@82 178 T15 = T13 + T14;
Chris@82 179 T1s = T1p + T1r;
Chris@82 180 T1x = T1u + T1w;
Chris@82 181 T1O = T1s + T1x;
Chris@82 182 }
Chris@82 183 {
Chris@82 184 E T1D, T11, T1C, T1L, T1N, T1H, T1K, T1M, T1E;
Chris@82 185 T1D = Tz - T10;
Chris@82 186 T11 = Tz + T10;
Chris@82 187 T1C = FNMS(KP250000000, T11, T8);
Chris@82 188 T1H = T1F - T1G;
Chris@82 189 T1K = T1I - T1J;
Chris@82 190 T1L = FMA(KP618033988, T1K, T1H);
Chris@82 191 T1N = FNMS(KP618033988, T1H, T1K);
Chris@82 192 Rm[WS(rs, 4)] = T8 + T11;
Chris@82 193 T1M = FNMS(KP559016994, T1D, T1C);
Chris@82 194 Rm[WS(rs, 2)] = FNMS(KP951056516, T1N, T1M);
Chris@82 195 Rp[WS(rs, 3)] = FMA(KP951056516, T1N, T1M);
Chris@82 196 T1E = FMA(KP559016994, T1D, T1C);
Chris@82 197 Rm[0] = FNMS(KP951056516, T1L, T1E);
Chris@82 198 Rp[WS(rs, 1)] = FMA(KP951056516, T1L, T1E);
Chris@82 199 }
Chris@82 200 {
Chris@82 201 E T28, T25, T27, T2c, T2e, T2a, T2b, T2d, T29;
Chris@82 202 T28 = T24 - T23;
Chris@82 203 T25 = T23 + T24;
Chris@82 204 T27 = FMA(KP250000000, T25, T26);
Chris@82 205 T2a = Ty - Tl;
Chris@82 206 T2b = TZ - TM;
Chris@82 207 T2c = FMA(KP618033988, T2b, T2a);
Chris@82 208 T2e = FNMS(KP618033988, T2a, T2b);
Chris@82 209 Im[WS(rs, 4)] = T25 - T26;
Chris@82 210 T2d = FNMS(KP559016994, T28, T27);
Chris@82 211 Im[WS(rs, 2)] = FMS(KP951056516, T2e, T2d);
Chris@82 212 Ip[WS(rs, 3)] = FMA(KP951056516, T2e, T2d);
Chris@82 213 T29 = FMA(KP559016994, T28, T27);
Chris@82 214 Im[0] = FMS(KP951056516, T2c, T29);
Chris@82 215 Ip[WS(rs, 1)] = FMA(KP951056516, T2c, T29);
Chris@82 216 }
Chris@82 217 {
Chris@82 218 E T1b, T19, T1a, T1z, T1B, T1n, T1y, T1A, T1c;
Chris@82 219 T1b = T15 - T18;
Chris@82 220 T19 = T15 + T18;
Chris@82 221 T1a = FNMS(KP250000000, T19, T12);
Chris@82 222 T1n = T1h - T1m;
Chris@82 223 T1y = T1s - T1x;
Chris@82 224 T1z = FNMS(KP618033988, T1y, T1n);
Chris@82 225 T1B = FMA(KP618033988, T1n, T1y);
Chris@82 226 Rp[0] = T12 + T19;
Chris@82 227 T1A = FMA(KP559016994, T1b, T1a);
Chris@82 228 Rp[WS(rs, 4)] = FNMS(KP951056516, T1B, T1A);
Chris@82 229 Rm[WS(rs, 3)] = FMA(KP951056516, T1B, T1A);
Chris@82 230 T1c = FNMS(KP559016994, T1b, T1a);
Chris@82 231 Rp[WS(rs, 2)] = FNMS(KP951056516, T1z, T1c);
Chris@82 232 Rm[WS(rs, 1)] = FMA(KP951056516, T1z, T1c);
Chris@82 233 }
Chris@82 234 {
Chris@82 235 E T1W, T1Q, T1V, T20, T22, T1Y, T1Z, T21, T1X;
Chris@82 236 T1W = T1O - T1P;
Chris@82 237 T1Q = T1O + T1P;
Chris@82 238 T1V = FNMS(KP250000000, T1Q, T1U);
Chris@82 239 T1Y = T16 - T17;
Chris@82 240 T1Z = T13 - T14;
Chris@82 241 T20 = FNMS(KP618033988, T1Z, T1Y);
Chris@82 242 T22 = FMA(KP618033988, T1Y, T1Z);
Chris@82 243 Ip[0] = T1Q + T1U;
Chris@82 244 T21 = FMA(KP559016994, T1W, T1V);
Chris@82 245 Im[WS(rs, 3)] = FMS(KP951056516, T22, T21);
Chris@82 246 Ip[WS(rs, 4)] = FMA(KP951056516, T22, T21);
Chris@82 247 T1X = FNMS(KP559016994, T1W, T1V);
Chris@82 248 Im[WS(rs, 1)] = FMS(KP951056516, T20, T1X);
Chris@82 249 Ip[WS(rs, 2)] = FMA(KP951056516, T20, T1X);
Chris@82 250 }
Chris@82 251 }
Chris@82 252 }
Chris@82 253 }
Chris@82 254
Chris@82 255 static const tw_instr twinstr[] = {
Chris@82 256 {TW_FULL, 1, 10},
Chris@82 257 {TW_NEXT, 1, 0}
Chris@82 258 };
Chris@82 259
Chris@82 260 static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {48, 18, 54, 0} };
Chris@82 261
Chris@82 262 void X(codelet_hc2cf_10) (planner *p) {
Chris@82 263 X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT);
Chris@82 264 }
Chris@82 265 #else
Chris@82 266
Chris@82 267 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 10 -dit -name hc2cf_10 -include rdft/scalar/hc2cf.h */
Chris@82 268
Chris@82 269 /*
Chris@82 270 * This function contains 102 FP additions, 60 FP multiplications,
Chris@82 271 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 272 * 45 stack variables, 4 constants, and 40 memory accesses
Chris@82 273 */
Chris@82 274 #include "rdft/scalar/hc2cf.h"
Chris@82 275
Chris@82 276 static void hc2cf_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 277 {
Chris@82 278 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 279 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 280 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 281 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 282 {
Chris@82 283 INT m;
Chris@82 284 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 285 E T7, T1O, TT, T1C, TF, TQ, TR, T1r, T1s, T1L, TX, TY, TZ, T16, T19;
Chris@82 286 E T1y, Ti, Tt, Tu, T1o, T1p, T1M, TU, TV, TW, T1d, T1g, T1x;
Chris@82 287 {
Chris@82 288 E T1, T1B, T6, T1A;
Chris@82 289 T1 = Rp[0];
Chris@82 290 T1B = Rm[0];
Chris@82 291 {
Chris@82 292 E T3, T5, T2, T4;
Chris@82 293 T3 = Ip[WS(rs, 2)];
Chris@82 294 T5 = Im[WS(rs, 2)];
Chris@82 295 T2 = W[8];
Chris@82 296 T4 = W[9];
Chris@82 297 T6 = FMA(T2, T3, T4 * T5);
Chris@82 298 T1A = FNMS(T4, T3, T2 * T5);
Chris@82 299 }
Chris@82 300 T7 = T1 - T6;
Chris@82 301 T1O = T1B - T1A;
Chris@82 302 TT = T1 + T6;
Chris@82 303 T1C = T1A + T1B;
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E Tz, T14, TP, T18, TE, T15, TK, T17;
Chris@82 307 {
Chris@82 308 E Tw, Ty, Tv, Tx;
Chris@82 309 Tw = Rp[WS(rs, 2)];
Chris@82 310 Ty = Rm[WS(rs, 2)];
Chris@82 311 Tv = W[6];
Chris@82 312 Tx = W[7];
Chris@82 313 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 314 T14 = FNMS(Tx, Tw, Tv * Ty);
Chris@82 315 }
Chris@82 316 {
Chris@82 317 E TM, TO, TL, TN;
Chris@82 318 TM = Ip[0];
Chris@82 319 TO = Im[0];
Chris@82 320 TL = W[0];
Chris@82 321 TN = W[1];
Chris@82 322 TP = FMA(TL, TM, TN * TO);
Chris@82 323 T18 = FNMS(TN, TM, TL * TO);
Chris@82 324 }
Chris@82 325 {
Chris@82 326 E TB, TD, TA, TC;
Chris@82 327 TB = Ip[WS(rs, 4)];
Chris@82 328 TD = Im[WS(rs, 4)];
Chris@82 329 TA = W[16];
Chris@82 330 TC = W[17];
Chris@82 331 TE = FMA(TA, TB, TC * TD);
Chris@82 332 T15 = FNMS(TC, TB, TA * TD);
Chris@82 333 }
Chris@82 334 {
Chris@82 335 E TH, TJ, TG, TI;
Chris@82 336 TH = Rp[WS(rs, 3)];
Chris@82 337 TJ = Rm[WS(rs, 3)];
Chris@82 338 TG = W[10];
Chris@82 339 TI = W[11];
Chris@82 340 TK = FMA(TG, TH, TI * TJ);
Chris@82 341 T17 = FNMS(TI, TH, TG * TJ);
Chris@82 342 }
Chris@82 343 TF = Tz - TE;
Chris@82 344 TQ = TK - TP;
Chris@82 345 TR = TF + TQ;
Chris@82 346 T1r = T14 - T15;
Chris@82 347 T1s = T18 - T17;
Chris@82 348 T1L = T1s - T1r;
Chris@82 349 TX = Tz + TE;
Chris@82 350 TY = TK + TP;
Chris@82 351 TZ = TX + TY;
Chris@82 352 T16 = T14 + T15;
Chris@82 353 T19 = T17 + T18;
Chris@82 354 T1y = T16 + T19;
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E Tc, T1b, Ts, T1f, Th, T1c, Tn, T1e;
Chris@82 358 {
Chris@82 359 E T9, Tb, T8, Ta;
Chris@82 360 T9 = Rp[WS(rs, 1)];
Chris@82 361 Tb = Rm[WS(rs, 1)];
Chris@82 362 T8 = W[2];
Chris@82 363 Ta = W[3];
Chris@82 364 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 365 T1b = FNMS(Ta, T9, T8 * Tb);
Chris@82 366 }
Chris@82 367 {
Chris@82 368 E Tp, Tr, To, Tq;
Chris@82 369 Tp = Ip[WS(rs, 1)];
Chris@82 370 Tr = Im[WS(rs, 1)];
Chris@82 371 To = W[4];
Chris@82 372 Tq = W[5];
Chris@82 373 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 374 T1f = FNMS(Tq, Tp, To * Tr);
Chris@82 375 }
Chris@82 376 {
Chris@82 377 E Te, Tg, Td, Tf;
Chris@82 378 Te = Ip[WS(rs, 3)];
Chris@82 379 Tg = Im[WS(rs, 3)];
Chris@82 380 Td = W[12];
Chris@82 381 Tf = W[13];
Chris@82 382 Th = FMA(Td, Te, Tf * Tg);
Chris@82 383 T1c = FNMS(Tf, Te, Td * Tg);
Chris@82 384 }
Chris@82 385 {
Chris@82 386 E Tk, Tm, Tj, Tl;
Chris@82 387 Tk = Rp[WS(rs, 4)];
Chris@82 388 Tm = Rm[WS(rs, 4)];
Chris@82 389 Tj = W[14];
Chris@82 390 Tl = W[15];
Chris@82 391 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 392 T1e = FNMS(Tl, Tk, Tj * Tm);
Chris@82 393 }
Chris@82 394 Ti = Tc - Th;
Chris@82 395 Tt = Tn - Ts;
Chris@82 396 Tu = Ti + Tt;
Chris@82 397 T1o = T1b - T1c;
Chris@82 398 T1p = T1e - T1f;
Chris@82 399 T1M = T1o + T1p;
Chris@82 400 TU = Tc + Th;
Chris@82 401 TV = Tn + Ts;
Chris@82 402 TW = TU + TV;
Chris@82 403 T1d = T1b + T1c;
Chris@82 404 T1g = T1e + T1f;
Chris@82 405 T1x = T1d + T1g;
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E T1l, TS, T1m, T1u, T1w, T1q, T1t, T1v, T1n;
Chris@82 409 T1l = KP559016994 * (Tu - TR);
Chris@82 410 TS = Tu + TR;
Chris@82 411 T1m = FNMS(KP250000000, TS, T7);
Chris@82 412 T1q = T1o - T1p;
Chris@82 413 T1t = T1r + T1s;
Chris@82 414 T1u = FMA(KP951056516, T1q, KP587785252 * T1t);
Chris@82 415 T1w = FNMS(KP587785252, T1q, KP951056516 * T1t);
Chris@82 416 Rm[WS(rs, 4)] = T7 + TS;
Chris@82 417 T1v = T1m - T1l;
Chris@82 418 Rm[WS(rs, 2)] = T1v - T1w;
Chris@82 419 Rp[WS(rs, 3)] = T1v + T1w;
Chris@82 420 T1n = T1l + T1m;
Chris@82 421 Rm[0] = T1n - T1u;
Chris@82 422 Rp[WS(rs, 1)] = T1n + T1u;
Chris@82 423 }
Chris@82 424 {
Chris@82 425 E T1S, T1N, T1T, T1R, T1V, T1P, T1Q, T1W, T1U;
Chris@82 426 T1S = KP559016994 * (T1M + T1L);
Chris@82 427 T1N = T1L - T1M;
Chris@82 428 T1T = FMA(KP250000000, T1N, T1O);
Chris@82 429 T1P = TQ - TF;
Chris@82 430 T1Q = Ti - Tt;
Chris@82 431 T1R = FNMS(KP951056516, T1Q, KP587785252 * T1P);
Chris@82 432 T1V = FMA(KP587785252, T1Q, KP951056516 * T1P);
Chris@82 433 Im[WS(rs, 4)] = T1N - T1O;
Chris@82 434 T1W = T1T - T1S;
Chris@82 435 Im[WS(rs, 2)] = T1V - T1W;
Chris@82 436 Ip[WS(rs, 3)] = T1V + T1W;
Chris@82 437 T1U = T1S + T1T;
Chris@82 438 Im[0] = T1R - T1U;
Chris@82 439 Ip[WS(rs, 1)] = T1R + T1U;
Chris@82 440 }
Chris@82 441 {
Chris@82 442 E T12, T10, T11, T1i, T1k, T1a, T1h, T1j, T13;
Chris@82 443 T12 = KP559016994 * (TW - TZ);
Chris@82 444 T10 = TW + TZ;
Chris@82 445 T11 = FNMS(KP250000000, T10, TT);
Chris@82 446 T1a = T16 - T19;
Chris@82 447 T1h = T1d - T1g;
Chris@82 448 T1i = FNMS(KP587785252, T1h, KP951056516 * T1a);
Chris@82 449 T1k = FMA(KP951056516, T1h, KP587785252 * T1a);
Chris@82 450 Rp[0] = TT + T10;
Chris@82 451 T1j = T12 + T11;
Chris@82 452 Rp[WS(rs, 4)] = T1j - T1k;
Chris@82 453 Rm[WS(rs, 3)] = T1j + T1k;
Chris@82 454 T13 = T11 - T12;
Chris@82 455 Rp[WS(rs, 2)] = T13 - T1i;
Chris@82 456 Rm[WS(rs, 1)] = T13 + T1i;
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
Chris@82 460 T1H = KP559016994 * (T1x - T1y);
Chris@82 461 T1z = T1x + T1y;
Chris@82 462 T1G = FNMS(KP250000000, T1z, T1C);
Chris@82 463 T1D = TX - TY;
Chris@82 464 T1E = TU - TV;
Chris@82 465 T1F = FNMS(KP587785252, T1E, KP951056516 * T1D);
Chris@82 466 T1J = FMA(KP951056516, T1E, KP587785252 * T1D);
Chris@82 467 Ip[0] = T1z + T1C;
Chris@82 468 T1K = T1H + T1G;
Chris@82 469 Im[WS(rs, 3)] = T1J - T1K;
Chris@82 470 Ip[WS(rs, 4)] = T1J + T1K;
Chris@82 471 T1I = T1G - T1H;
Chris@82 472 Im[WS(rs, 1)] = T1F - T1I;
Chris@82 473 Ip[WS(rs, 2)] = T1F + T1I;
Chris@82 474 }
Chris@82 475 }
Chris@82 476 }
Chris@82 477 }
Chris@82 478
Chris@82 479 static const tw_instr twinstr[] = {
Chris@82 480 {TW_FULL, 1, 10},
Chris@82 481 {TW_NEXT, 1, 0}
Chris@82 482 };
Chris@82 483
Chris@82 484 static const hc2c_desc desc = { 10, "hc2cf_10", twinstr, &GENUS, {72, 30, 30, 0} };
Chris@82 485
Chris@82 486 void X(codelet_hc2cf_10) (planner *p) {
Chris@82 487 X(khc2c_register) (p, hc2cf_10, &desc, HC2C_VIA_RDFT);
Chris@82 488 }
Chris@82 489 #endif