annotate src/fftw-3.3.5/dft/scalar/codelets/t2_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:37:28 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include t.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 114 FP additions, 94 FP multiplications,
Chris@42 32 * (or, 48 additions, 28 multiplications, 66 fused multiply/add),
Chris@42 33 * 85 stack variables, 4 constants, and 40 memory accesses
Chris@42 34 */
Chris@42 35 #include "t.h"
Chris@42 36
Chris@42 37 static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 46 E T27, T2b, T2a, T2c;
Chris@42 47 {
Chris@42 48 E T2, T3, T8, Tc, T5, T4, TX, T11, TE, T6, TB, TA;
Chris@42 49 T2 = W[0];
Chris@42 50 T3 = W[2];
Chris@42 51 T8 = W[4];
Chris@42 52 Tc = W[5];
Chris@42 53 T5 = W[1];
Chris@42 54 T4 = T2 * T3;
Chris@42 55 TX = T3 * T8;
Chris@42 56 TA = T2 * T8;
Chris@42 57 T11 = T3 * Tc;
Chris@42 58 TE = T2 * Tc;
Chris@42 59 T6 = W[3];
Chris@42 60 TB = FMA(T5, Tc, TA);
Chris@42 61 {
Chris@42 62 E T2d, T24, T1c, Tk, T1i, T28, T2l, T1a, T2f, T1I, T1R, T1Z, TL, T1v, T1d;
Chris@42 63 E Tz, T1S, T1r, TH, T1t;
Chris@42 64 {
Chris@42 65 E T1, TF, TY, T12, Tl, T7, T23, To, Tb, Te, Ti, Th, Td, Tw, Ts;
Chris@42 66 E Ta;
Chris@42 67 T1 = ri[0];
Chris@42 68 TF = FNMS(T5, T8, TE);
Chris@42 69 TY = FMA(T6, Tc, TX);
Chris@42 70 T12 = FNMS(T6, T8, T11);
Chris@42 71 Tl = FMA(T5, T6, T4);
Chris@42 72 T7 = FNMS(T5, T6, T4);
Chris@42 73 Ta = T2 * T6;
Chris@42 74 T23 = ii[0];
Chris@42 75 {
Chris@42 76 E Tg, T9, Tv, Tr;
Chris@42 77 Tg = T7 * Tc;
Chris@42 78 T9 = T7 * T8;
Chris@42 79 Tv = Tl * Tc;
Chris@42 80 Tr = Tl * T8;
Chris@42 81 To = FNMS(T5, T3, Ta);
Chris@42 82 Tb = FMA(T5, T3, Ta);
Chris@42 83 Te = ri[WS(rs, 5)];
Chris@42 84 Ti = ii[WS(rs, 5)];
Chris@42 85 Th = FNMS(Tb, T8, Tg);
Chris@42 86 Td = FMA(Tb, Tc, T9);
Chris@42 87 Tw = FNMS(To, T8, Tv);
Chris@42 88 Ts = FMA(To, Tc, Tr);
Chris@42 89 }
Chris@42 90 {
Chris@42 91 E T18, T1G, T1g, TW, T1P, T1C, T14, T1E;
Chris@42 92 {
Chris@42 93 E TR, T1z, TV, T1B, TZ, T13, T15, T17, T10, T1D;
Chris@42 94 {
Chris@42 95 E TO, TQ, TP, T22, Tj, T1y, T21, Tf;
Chris@42 96 TO = ri[WS(rs, 4)];
Chris@42 97 T21 = Td * Ti;
Chris@42 98 Tf = Td * Te;
Chris@42 99 TQ = ii[WS(rs, 4)];
Chris@42 100 TP = T7 * TO;
Chris@42 101 T22 = FNMS(Th, Te, T21);
Chris@42 102 Tj = FMA(Th, Ti, Tf);
Chris@42 103 T1y = T7 * TQ;
Chris@42 104 TR = FMA(Tb, TQ, TP);
Chris@42 105 T2d = T23 - T22;
Chris@42 106 T24 = T22 + T23;
Chris@42 107 T1c = T1 + Tj;
Chris@42 108 Tk = T1 - Tj;
Chris@42 109 T1z = FNMS(Tb, TO, T1y);
Chris@42 110 }
Chris@42 111 T15 = ri[WS(rs, 1)];
Chris@42 112 T17 = ii[WS(rs, 1)];
Chris@42 113 {
Chris@42 114 E TS, TU, T16, T1F, TT, T1A;
Chris@42 115 TS = ri[WS(rs, 9)];
Chris@42 116 TU = ii[WS(rs, 9)];
Chris@42 117 T16 = T2 * T15;
Chris@42 118 T1F = T2 * T17;
Chris@42 119 TT = T8 * TS;
Chris@42 120 T1A = T8 * TU;
Chris@42 121 T18 = FMA(T5, T17, T16);
Chris@42 122 T1G = FNMS(T5, T15, T1F);
Chris@42 123 TV = FMA(Tc, TU, TT);
Chris@42 124 T1B = FNMS(Tc, TS, T1A);
Chris@42 125 }
Chris@42 126 TZ = ri[WS(rs, 6)];
Chris@42 127 T13 = ii[WS(rs, 6)];
Chris@42 128 T1g = TR + TV;
Chris@42 129 TW = TR - TV;
Chris@42 130 T1P = T1z + T1B;
Chris@42 131 T1C = T1z - T1B;
Chris@42 132 T10 = TY * TZ;
Chris@42 133 T1D = TY * T13;
Chris@42 134 T14 = FMA(T12, T13, T10);
Chris@42 135 T1E = FNMS(T12, TZ, T1D);
Chris@42 136 }
Chris@42 137 {
Chris@42 138 E Tq, T1o, Ty, TC, TG, T1q, TD, T1s;
Chris@42 139 {
Chris@42 140 E TI, TK, Tt, T1p;
Chris@42 141 {
Chris@42 142 E Tm, T1n, Tp, Tn;
Chris@42 143 Tm = ri[WS(rs, 2)];
Chris@42 144 Tp = ii[WS(rs, 2)];
Chris@42 145 {
Chris@42 146 E T19, T1h, T1Q, T1H;
Chris@42 147 T19 = T14 - T18;
Chris@42 148 T1h = T14 + T18;
Chris@42 149 T1Q = T1E + T1G;
Chris@42 150 T1H = T1E - T1G;
Chris@42 151 Tn = Tl * Tm;
Chris@42 152 T1i = T1g + T1h;
Chris@42 153 T28 = T1g - T1h;
Chris@42 154 T2l = TW - T19;
Chris@42 155 T1a = TW + T19;
Chris@42 156 T2f = T1C + T1H;
Chris@42 157 T1I = T1C - T1H;
Chris@42 158 T1R = T1P - T1Q;
Chris@42 159 T1Z = T1P + T1Q;
Chris@42 160 T1n = Tl * Tp;
Chris@42 161 }
Chris@42 162 Tq = FMA(To, Tp, Tn);
Chris@42 163 TI = ri[WS(rs, 3)];
Chris@42 164 TK = ii[WS(rs, 3)];
Chris@42 165 T1o = FNMS(To, Tm, T1n);
Chris@42 166 }
Chris@42 167 {
Chris@42 168 E Tx, Tu, TJ, T1u;
Chris@42 169 Tt = ri[WS(rs, 7)];
Chris@42 170 TJ = T3 * TI;
Chris@42 171 T1u = T3 * TK;
Chris@42 172 Tx = ii[WS(rs, 7)];
Chris@42 173 Tu = Ts * Tt;
Chris@42 174 TL = FMA(T6, TK, TJ);
Chris@42 175 T1v = FNMS(T6, TI, T1u);
Chris@42 176 T1p = Ts * Tx;
Chris@42 177 Ty = FMA(Tw, Tx, Tu);
Chris@42 178 }
Chris@42 179 TC = ri[WS(rs, 8)];
Chris@42 180 TG = ii[WS(rs, 8)];
Chris@42 181 T1q = FNMS(Tw, Tt, T1p);
Chris@42 182 }
Chris@42 183 T1d = Tq + Ty;
Chris@42 184 Tz = Tq - Ty;
Chris@42 185 TD = TB * TC;
Chris@42 186 T1s = TB * TG;
Chris@42 187 T1S = T1o + T1q;
Chris@42 188 T1r = T1o - T1q;
Chris@42 189 TH = FMA(TF, TG, TD);
Chris@42 190 T1t = FNMS(TF, TC, T1s);
Chris@42 191 }
Chris@42 192 }
Chris@42 193 }
Chris@42 194 {
Chris@42 195 E T1f, T29, T1Y, T1U, T2j, T2n, T2m, T2o;
Chris@42 196 {
Chris@42 197 E T2k, T2e, T1l, T1L, T1J, T1k, T1b, T1e, TM;
Chris@42 198 T1e = TH + TL;
Chris@42 199 TM = TH - TL;
Chris@42 200 {
Chris@42 201 E T1w, T1T, TN, T1x;
Chris@42 202 T1w = T1t - T1v;
Chris@42 203 T1T = T1t + T1v;
Chris@42 204 T1f = T1d + T1e;
Chris@42 205 T29 = T1d - T1e;
Chris@42 206 T2k = Tz - TM;
Chris@42 207 TN = Tz + TM;
Chris@42 208 T1x = T1r - T1w;
Chris@42 209 T2e = T1r + T1w;
Chris@42 210 T1Y = T1S + T1T;
Chris@42 211 T1U = T1S - T1T;
Chris@42 212 T1l = TN - T1a;
Chris@42 213 T1b = TN + T1a;
Chris@42 214 T1L = FNMS(KP618033988, T1x, T1I);
Chris@42 215 T1J = FMA(KP618033988, T1I, T1x);
Chris@42 216 }
Chris@42 217 T1k = FNMS(KP250000000, T1b, Tk);
Chris@42 218 ri[WS(rs, 5)] = Tk + T1b;
Chris@42 219 {
Chris@42 220 E T2g, T2i, T2h, T1K, T1m;
Chris@42 221 T2g = T2e + T2f;
Chris@42 222 T2i = T2e - T2f;
Chris@42 223 T1K = FNMS(KP559016994, T1l, T1k);
Chris@42 224 T1m = FMA(KP559016994, T1l, T1k);
Chris@42 225 T2h = FNMS(KP250000000, T2g, T2d);
Chris@42 226 ri[WS(rs, 1)] = FMA(KP951056516, T1J, T1m);
Chris@42 227 ri[WS(rs, 9)] = FNMS(KP951056516, T1J, T1m);
Chris@42 228 ri[WS(rs, 3)] = FMA(KP951056516, T1L, T1K);
Chris@42 229 ri[WS(rs, 7)] = FNMS(KP951056516, T1L, T1K);
Chris@42 230 ii[WS(rs, 5)] = T2g + T2d;
Chris@42 231 T2j = FMA(KP559016994, T2i, T2h);
Chris@42 232 T2n = FNMS(KP559016994, T2i, T2h);
Chris@42 233 T2m = FMA(KP618033988, T2l, T2k);
Chris@42 234 T2o = FNMS(KP618033988, T2k, T2l);
Chris@42 235 }
Chris@42 236 }
Chris@42 237 {
Chris@42 238 E T1O, T1W, T1V, T1X, T1j, T1N, T1M, T20, T26, T25;
Chris@42 239 T1j = T1f + T1i;
Chris@42 240 T1N = T1f - T1i;
Chris@42 241 ii[WS(rs, 7)] = FMA(KP951056516, T2o, T2n);
Chris@42 242 ii[WS(rs, 3)] = FNMS(KP951056516, T2o, T2n);
Chris@42 243 ii[WS(rs, 9)] = FMA(KP951056516, T2m, T2j);
Chris@42 244 ii[WS(rs, 1)] = FNMS(KP951056516, T2m, T2j);
Chris@42 245 T1M = FNMS(KP250000000, T1j, T1c);
Chris@42 246 ri[0] = T1c + T1j;
Chris@42 247 T1O = FNMS(KP559016994, T1N, T1M);
Chris@42 248 T1W = FMA(KP559016994, T1N, T1M);
Chris@42 249 T1V = FNMS(KP618033988, T1U, T1R);
Chris@42 250 T1X = FMA(KP618033988, T1R, T1U);
Chris@42 251 T20 = T1Y + T1Z;
Chris@42 252 T26 = T1Y - T1Z;
Chris@42 253 ri[WS(rs, 6)] = FMA(KP951056516, T1X, T1W);
Chris@42 254 ri[WS(rs, 4)] = FNMS(KP951056516, T1X, T1W);
Chris@42 255 ri[WS(rs, 8)] = FMA(KP951056516, T1V, T1O);
Chris@42 256 ri[WS(rs, 2)] = FNMS(KP951056516, T1V, T1O);
Chris@42 257 T25 = FNMS(KP250000000, T20, T24);
Chris@42 258 ii[0] = T20 + T24;
Chris@42 259 T27 = FNMS(KP559016994, T26, T25);
Chris@42 260 T2b = FMA(KP559016994, T26, T25);
Chris@42 261 T2a = FNMS(KP618033988, T29, T28);
Chris@42 262 T2c = FMA(KP618033988, T28, T29);
Chris@42 263 }
Chris@42 264 }
Chris@42 265 }
Chris@42 266 }
Chris@42 267 ii[WS(rs, 6)] = FNMS(KP951056516, T2c, T2b);
Chris@42 268 ii[WS(rs, 4)] = FMA(KP951056516, T2c, T2b);
Chris@42 269 ii[WS(rs, 8)] = FNMS(KP951056516, T2a, T27);
Chris@42 270 ii[WS(rs, 2)] = FMA(KP951056516, T2a, T27);
Chris@42 271 }
Chris@42 272 }
Chris@42 273 }
Chris@42 274
Chris@42 275 static const tw_instr twinstr[] = {
Chris@42 276 {TW_CEXP, 0, 1},
Chris@42 277 {TW_CEXP, 0, 3},
Chris@42 278 {TW_CEXP, 0, 9},
Chris@42 279 {TW_NEXT, 1, 0}
Chris@42 280 };
Chris@42 281
Chris@42 282 static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {48, 28, 66, 0}, 0, 0, 0 };
Chris@42 283
Chris@42 284 void X(codelet_t2_10) (planner *p) {
Chris@42 285 X(kdft_dit_register) (p, t2_10, &desc);
Chris@42 286 }
Chris@42 287 #else /* HAVE_FMA */
Chris@42 288
Chris@42 289 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include t.h */
Chris@42 290
Chris@42 291 /*
Chris@42 292 * This function contains 114 FP additions, 80 FP multiplications,
Chris@42 293 * (or, 76 additions, 42 multiplications, 38 fused multiply/add),
Chris@42 294 * 63 stack variables, 4 constants, and 40 memory accesses
Chris@42 295 */
Chris@42 296 #include "t.h"
Chris@42 297
Chris@42 298 static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 299 {
Chris@42 300 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 301 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 302 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 303 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 304 {
Chris@42 305 INT m;
Chris@42 306 for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@42 307 E T2, T5, T3, T6, T8, Tm, Tc, Tk, T9, Td, Te, TM, TO, Tg, Tp;
Chris@42 308 E Tv, Tx, Tr;
Chris@42 309 {
Chris@42 310 E T4, Tb, T7, Ta;
Chris@42 311 T2 = W[0];
Chris@42 312 T5 = W[1];
Chris@42 313 T3 = W[2];
Chris@42 314 T6 = W[3];
Chris@42 315 T4 = T2 * T3;
Chris@42 316 Tb = T5 * T3;
Chris@42 317 T7 = T5 * T6;
Chris@42 318 Ta = T2 * T6;
Chris@42 319 T8 = T4 - T7;
Chris@42 320 Tm = Ta - Tb;
Chris@42 321 Tc = Ta + Tb;
Chris@42 322 Tk = T4 + T7;
Chris@42 323 T9 = W[4];
Chris@42 324 Td = W[5];
Chris@42 325 Te = FMA(T8, T9, Tc * Td);
Chris@42 326 TM = FMA(T3, T9, T6 * Td);
Chris@42 327 TO = FNMS(T6, T9, T3 * Td);
Chris@42 328 Tg = FNMS(Tc, T9, T8 * Td);
Chris@42 329 Tp = FMA(Tk, T9, Tm * Td);
Chris@42 330 Tv = FMA(T2, T9, T5 * Td);
Chris@42 331 Tx = FNMS(T5, T9, T2 * Td);
Chris@42 332 Tr = FNMS(Tm, T9, Tk * Td);
Chris@42 333 }
Chris@42 334 {
Chris@42 335 E Tj, T1S, TX, T1G, TL, TU, TV, T1s, T1t, T1C, T11, T12, T13, T1h, T1k;
Chris@42 336 E T1Q, Tu, TD, TE, T1v, T1w, T1B, TY, TZ, T10, T1a, T1d, T1P;
Chris@42 337 {
Chris@42 338 E T1, T1F, Ti, T1E, Tf, Th;
Chris@42 339 T1 = ri[0];
Chris@42 340 T1F = ii[0];
Chris@42 341 Tf = ri[WS(rs, 5)];
Chris@42 342 Th = ii[WS(rs, 5)];
Chris@42 343 Ti = FMA(Te, Tf, Tg * Th);
Chris@42 344 T1E = FNMS(Tg, Tf, Te * Th);
Chris@42 345 Tj = T1 - Ti;
Chris@42 346 T1S = T1F - T1E;
Chris@42 347 TX = T1 + Ti;
Chris@42 348 T1G = T1E + T1F;
Chris@42 349 }
Chris@42 350 {
Chris@42 351 E TH, T1f, TT, T1j, TK, T1g, TQ, T1i;
Chris@42 352 {
Chris@42 353 E TF, TG, TR, TS;
Chris@42 354 TF = ri[WS(rs, 4)];
Chris@42 355 TG = ii[WS(rs, 4)];
Chris@42 356 TH = FMA(T8, TF, Tc * TG);
Chris@42 357 T1f = FNMS(Tc, TF, T8 * TG);
Chris@42 358 TR = ri[WS(rs, 1)];
Chris@42 359 TS = ii[WS(rs, 1)];
Chris@42 360 TT = FMA(T2, TR, T5 * TS);
Chris@42 361 T1j = FNMS(T5, TR, T2 * TS);
Chris@42 362 }
Chris@42 363 {
Chris@42 364 E TI, TJ, TN, TP;
Chris@42 365 TI = ri[WS(rs, 9)];
Chris@42 366 TJ = ii[WS(rs, 9)];
Chris@42 367 TK = FMA(T9, TI, Td * TJ);
Chris@42 368 T1g = FNMS(Td, TI, T9 * TJ);
Chris@42 369 TN = ri[WS(rs, 6)];
Chris@42 370 TP = ii[WS(rs, 6)];
Chris@42 371 TQ = FMA(TM, TN, TO * TP);
Chris@42 372 T1i = FNMS(TO, TN, TM * TP);
Chris@42 373 }
Chris@42 374 TL = TH - TK;
Chris@42 375 TU = TQ - TT;
Chris@42 376 TV = TL + TU;
Chris@42 377 T1s = T1f + T1g;
Chris@42 378 T1t = T1i + T1j;
Chris@42 379 T1C = T1s + T1t;
Chris@42 380 T11 = TH + TK;
Chris@42 381 T12 = TQ + TT;
Chris@42 382 T13 = T11 + T12;
Chris@42 383 T1h = T1f - T1g;
Chris@42 384 T1k = T1i - T1j;
Chris@42 385 T1Q = T1h + T1k;
Chris@42 386 }
Chris@42 387 {
Chris@42 388 E To, T18, TC, T1c, Tt, T19, Tz, T1b;
Chris@42 389 {
Chris@42 390 E Tl, Tn, TA, TB;
Chris@42 391 Tl = ri[WS(rs, 2)];
Chris@42 392 Tn = ii[WS(rs, 2)];
Chris@42 393 To = FMA(Tk, Tl, Tm * Tn);
Chris@42 394 T18 = FNMS(Tm, Tl, Tk * Tn);
Chris@42 395 TA = ri[WS(rs, 3)];
Chris@42 396 TB = ii[WS(rs, 3)];
Chris@42 397 TC = FMA(T3, TA, T6 * TB);
Chris@42 398 T1c = FNMS(T6, TA, T3 * TB);
Chris@42 399 }
Chris@42 400 {
Chris@42 401 E Tq, Ts, Tw, Ty;
Chris@42 402 Tq = ri[WS(rs, 7)];
Chris@42 403 Ts = ii[WS(rs, 7)];
Chris@42 404 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@42 405 T19 = FNMS(Tr, Tq, Tp * Ts);
Chris@42 406 Tw = ri[WS(rs, 8)];
Chris@42 407 Ty = ii[WS(rs, 8)];
Chris@42 408 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 409 T1b = FNMS(Tx, Tw, Tv * Ty);
Chris@42 410 }
Chris@42 411 Tu = To - Tt;
Chris@42 412 TD = Tz - TC;
Chris@42 413 TE = Tu + TD;
Chris@42 414 T1v = T18 + T19;
Chris@42 415 T1w = T1b + T1c;
Chris@42 416 T1B = T1v + T1w;
Chris@42 417 TY = To + Tt;
Chris@42 418 TZ = Tz + TC;
Chris@42 419 T10 = TY + TZ;
Chris@42 420 T1a = T18 - T19;
Chris@42 421 T1d = T1b - T1c;
Chris@42 422 T1P = T1a + T1d;
Chris@42 423 }
Chris@42 424 {
Chris@42 425 E T15, TW, T16, T1m, T1o, T1e, T1l, T1n, T17;
Chris@42 426 T15 = KP559016994 * (TE - TV);
Chris@42 427 TW = TE + TV;
Chris@42 428 T16 = FNMS(KP250000000, TW, Tj);
Chris@42 429 T1e = T1a - T1d;
Chris@42 430 T1l = T1h - T1k;
Chris@42 431 T1m = FMA(KP951056516, T1e, KP587785252 * T1l);
Chris@42 432 T1o = FNMS(KP587785252, T1e, KP951056516 * T1l);
Chris@42 433 ri[WS(rs, 5)] = Tj + TW;
Chris@42 434 T1n = T16 - T15;
Chris@42 435 ri[WS(rs, 7)] = T1n - T1o;
Chris@42 436 ri[WS(rs, 3)] = T1n + T1o;
Chris@42 437 T17 = T15 + T16;
Chris@42 438 ri[WS(rs, 9)] = T17 - T1m;
Chris@42 439 ri[WS(rs, 1)] = T17 + T1m;
Chris@42 440 }
Chris@42 441 {
Chris@42 442 E T1R, T1T, T1U, T1Y, T20, T1W, T1X, T1Z, T1V;
Chris@42 443 T1R = KP559016994 * (T1P - T1Q);
Chris@42 444 T1T = T1P + T1Q;
Chris@42 445 T1U = FNMS(KP250000000, T1T, T1S);
Chris@42 446 T1W = Tu - TD;
Chris@42 447 T1X = TL - TU;
Chris@42 448 T1Y = FMA(KP951056516, T1W, KP587785252 * T1X);
Chris@42 449 T20 = FNMS(KP587785252, T1W, KP951056516 * T1X);
Chris@42 450 ii[WS(rs, 5)] = T1T + T1S;
Chris@42 451 T1Z = T1U - T1R;
Chris@42 452 ii[WS(rs, 3)] = T1Z - T20;
Chris@42 453 ii[WS(rs, 7)] = T20 + T1Z;
Chris@42 454 T1V = T1R + T1U;
Chris@42 455 ii[WS(rs, 1)] = T1V - T1Y;
Chris@42 456 ii[WS(rs, 9)] = T1Y + T1V;
Chris@42 457 }
Chris@42 458 {
Chris@42 459 E T1q, T14, T1p, T1y, T1A, T1u, T1x, T1z, T1r;
Chris@42 460 T1q = KP559016994 * (T10 - T13);
Chris@42 461 T14 = T10 + T13;
Chris@42 462 T1p = FNMS(KP250000000, T14, TX);
Chris@42 463 T1u = T1s - T1t;
Chris@42 464 T1x = T1v - T1w;
Chris@42 465 T1y = FNMS(KP587785252, T1x, KP951056516 * T1u);
Chris@42 466 T1A = FMA(KP951056516, T1x, KP587785252 * T1u);
Chris@42 467 ri[0] = TX + T14;
Chris@42 468 T1z = T1q + T1p;
Chris@42 469 ri[WS(rs, 4)] = T1z - T1A;
Chris@42 470 ri[WS(rs, 6)] = T1z + T1A;
Chris@42 471 T1r = T1p - T1q;
Chris@42 472 ri[WS(rs, 2)] = T1r - T1y;
Chris@42 473 ri[WS(rs, 8)] = T1r + T1y;
Chris@42 474 }
Chris@42 475 {
Chris@42 476 E T1L, T1D, T1K, T1J, T1N, T1H, T1I, T1O, T1M;
Chris@42 477 T1L = KP559016994 * (T1B - T1C);
Chris@42 478 T1D = T1B + T1C;
Chris@42 479 T1K = FNMS(KP250000000, T1D, T1G);
Chris@42 480 T1H = T11 - T12;
Chris@42 481 T1I = TY - TZ;
Chris@42 482 T1J = FNMS(KP587785252, T1I, KP951056516 * T1H);
Chris@42 483 T1N = FMA(KP951056516, T1I, KP587785252 * T1H);
Chris@42 484 ii[0] = T1D + T1G;
Chris@42 485 T1O = T1L + T1K;
Chris@42 486 ii[WS(rs, 4)] = T1N + T1O;
Chris@42 487 ii[WS(rs, 6)] = T1O - T1N;
Chris@42 488 T1M = T1K - T1L;
Chris@42 489 ii[WS(rs, 2)] = T1J + T1M;
Chris@42 490 ii[WS(rs, 8)] = T1M - T1J;
Chris@42 491 }
Chris@42 492 }
Chris@42 493 }
Chris@42 494 }
Chris@42 495 }
Chris@42 496
Chris@42 497 static const tw_instr twinstr[] = {
Chris@42 498 {TW_CEXP, 0, 1},
Chris@42 499 {TW_CEXP, 0, 3},
Chris@42 500 {TW_CEXP, 0, 9},
Chris@42 501 {TW_NEXT, 1, 0}
Chris@42 502 };
Chris@42 503
Chris@42 504 static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {76, 42, 38, 0}, 0, 0, 0 };
Chris@42 505
Chris@42 506 void X(codelet_t2_10) (planner *p) {
Chris@42 507 X(kdft_dit_register) (p, t2_10, &desc);
Chris@42 508 }
Chris@42 509 #endif /* HAVE_FMA */