annotate src/fftw-3.3.8/dft/scalar/codelets/t2_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:25 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 114 FP additions, 94 FP multiplications,
Chris@82 32 * (or, 48 additions, 28 multiplications, 66 fused multiply/add),
Chris@82 33 * 63 stack variables, 4 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@82 46 E T2, T3, T8, Tc, T5, T6, Tl, T7, TB, TF, T12, TY, To, Ts, Tw;
Chris@82 47 E Tb, Td, Th;
Chris@82 48 {
Chris@82 49 E TA, TX, TE, T11, Ta, T4;
Chris@82 50 T2 = W[0];
Chris@82 51 T3 = W[2];
Chris@82 52 T4 = T2 * T3;
Chris@82 53 T8 = W[4];
Chris@82 54 TA = T2 * T8;
Chris@82 55 TX = T3 * T8;
Chris@82 56 Tc = W[5];
Chris@82 57 TE = T2 * Tc;
Chris@82 58 T11 = T3 * Tc;
Chris@82 59 T5 = W[1];
Chris@82 60 T6 = W[3];
Chris@82 61 Ta = T2 * T6;
Chris@82 62 Tl = FMA(T5, T6, T4);
Chris@82 63 T7 = FNMS(T5, T6, T4);
Chris@82 64 TB = FMA(T5, Tc, TA);
Chris@82 65 TF = FNMS(T5, T8, TE);
Chris@82 66 T12 = FNMS(T6, T8, T11);
Chris@82 67 TY = FMA(T6, Tc, TX);
Chris@82 68 {
Chris@82 69 E Tr, Tv, T9, Tg;
Chris@82 70 Tr = Tl * T8;
Chris@82 71 Tv = Tl * Tc;
Chris@82 72 To = FNMS(T5, T3, Ta);
Chris@82 73 Ts = FMA(To, Tc, Tr);
Chris@82 74 Tw = FNMS(To, T8, Tv);
Chris@82 75 T9 = T7 * T8;
Chris@82 76 Tg = T7 * Tc;
Chris@82 77 Tb = FMA(T5, T3, Ta);
Chris@82 78 Td = FMA(Tb, Tc, T9);
Chris@82 79 Th = FNMS(Tb, T8, Tg);
Chris@82 80 }
Chris@82 81 }
Chris@82 82 {
Chris@82 83 E Tk, T1c, T24, T2d, TW, T19, T1a, T1P, T1Q, T1Z, T1g, T1h, T1i, T1C, T1H;
Chris@82 84 E T2f, Tz, TM, TN, T1S, T1T, T1Y, T1d, T1e, T1f, T1r, T1w, T2e;
Chris@82 85 {
Chris@82 86 E T1, T23, Te, Tf, Ti, T21, Tj, T22;
Chris@82 87 T1 = ri[0];
Chris@82 88 T23 = ii[0];
Chris@82 89 Te = ri[WS(rs, 5)];
Chris@82 90 Tf = Td * Te;
Chris@82 91 Ti = ii[WS(rs, 5)];
Chris@82 92 T21 = Td * Ti;
Chris@82 93 Tj = FMA(Th, Ti, Tf);
Chris@82 94 Tk = T1 - Tj;
Chris@82 95 T1c = T1 + Tj;
Chris@82 96 T22 = FNMS(Th, Te, T21);
Chris@82 97 T24 = T22 + T23;
Chris@82 98 T2d = T23 - T22;
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E TR, T1z, T18, T1G, TV, T1B, T14, T1E;
Chris@82 102 {
Chris@82 103 E TO, TP, TQ, T1y;
Chris@82 104 TO = ri[WS(rs, 4)];
Chris@82 105 TP = T7 * TO;
Chris@82 106 TQ = ii[WS(rs, 4)];
Chris@82 107 T1y = T7 * TQ;
Chris@82 108 TR = FMA(Tb, TQ, TP);
Chris@82 109 T1z = FNMS(Tb, TO, T1y);
Chris@82 110 }
Chris@82 111 {
Chris@82 112 E T15, T16, T17, T1F;
Chris@82 113 T15 = ri[WS(rs, 1)];
Chris@82 114 T16 = T2 * T15;
Chris@82 115 T17 = ii[WS(rs, 1)];
Chris@82 116 T1F = T2 * T17;
Chris@82 117 T18 = FMA(T5, T17, T16);
Chris@82 118 T1G = FNMS(T5, T15, T1F);
Chris@82 119 }
Chris@82 120 {
Chris@82 121 E TS, TT, TU, T1A;
Chris@82 122 TS = ri[WS(rs, 9)];
Chris@82 123 TT = T8 * TS;
Chris@82 124 TU = ii[WS(rs, 9)];
Chris@82 125 T1A = T8 * TU;
Chris@82 126 TV = FMA(Tc, TU, TT);
Chris@82 127 T1B = FNMS(Tc, TS, T1A);
Chris@82 128 }
Chris@82 129 {
Chris@82 130 E TZ, T10, T13, T1D;
Chris@82 131 TZ = ri[WS(rs, 6)];
Chris@82 132 T10 = TY * TZ;
Chris@82 133 T13 = ii[WS(rs, 6)];
Chris@82 134 T1D = TY * T13;
Chris@82 135 T14 = FMA(T12, T13, T10);
Chris@82 136 T1E = FNMS(T12, TZ, T1D);
Chris@82 137 }
Chris@82 138 TW = TR - TV;
Chris@82 139 T19 = T14 - T18;
Chris@82 140 T1a = TW + T19;
Chris@82 141 T1P = T1z + T1B;
Chris@82 142 T1Q = T1E + T1G;
Chris@82 143 T1Z = T1P + T1Q;
Chris@82 144 T1g = TR + TV;
Chris@82 145 T1h = T14 + T18;
Chris@82 146 T1i = T1g + T1h;
Chris@82 147 T1C = T1z - T1B;
Chris@82 148 T1H = T1E - T1G;
Chris@82 149 T2f = T1C + T1H;
Chris@82 150 }
Chris@82 151 {
Chris@82 152 E Tq, T1o, TL, T1v, Ty, T1q, TH, T1t;
Chris@82 153 {
Chris@82 154 E Tm, Tn, Tp, T1n;
Chris@82 155 Tm = ri[WS(rs, 2)];
Chris@82 156 Tn = Tl * Tm;
Chris@82 157 Tp = ii[WS(rs, 2)];
Chris@82 158 T1n = Tl * Tp;
Chris@82 159 Tq = FMA(To, Tp, Tn);
Chris@82 160 T1o = FNMS(To, Tm, T1n);
Chris@82 161 }
Chris@82 162 {
Chris@82 163 E TI, TJ, TK, T1u;
Chris@82 164 TI = ri[WS(rs, 3)];
Chris@82 165 TJ = T3 * TI;
Chris@82 166 TK = ii[WS(rs, 3)];
Chris@82 167 T1u = T3 * TK;
Chris@82 168 TL = FMA(T6, TK, TJ);
Chris@82 169 T1v = FNMS(T6, TI, T1u);
Chris@82 170 }
Chris@82 171 {
Chris@82 172 E Tt, Tu, Tx, T1p;
Chris@82 173 Tt = ri[WS(rs, 7)];
Chris@82 174 Tu = Ts * Tt;
Chris@82 175 Tx = ii[WS(rs, 7)];
Chris@82 176 T1p = Ts * Tx;
Chris@82 177 Ty = FMA(Tw, Tx, Tu);
Chris@82 178 T1q = FNMS(Tw, Tt, T1p);
Chris@82 179 }
Chris@82 180 {
Chris@82 181 E TC, TD, TG, T1s;
Chris@82 182 TC = ri[WS(rs, 8)];
Chris@82 183 TD = TB * TC;
Chris@82 184 TG = ii[WS(rs, 8)];
Chris@82 185 T1s = TB * TG;
Chris@82 186 TH = FMA(TF, TG, TD);
Chris@82 187 T1t = FNMS(TF, TC, T1s);
Chris@82 188 }
Chris@82 189 Tz = Tq - Ty;
Chris@82 190 TM = TH - TL;
Chris@82 191 TN = Tz + TM;
Chris@82 192 T1S = T1o + T1q;
Chris@82 193 T1T = T1t + T1v;
Chris@82 194 T1Y = T1S + T1T;
Chris@82 195 T1d = Tq + Ty;
Chris@82 196 T1e = TH + TL;
Chris@82 197 T1f = T1d + T1e;
Chris@82 198 T1r = T1o - T1q;
Chris@82 199 T1w = T1t - T1v;
Chris@82 200 T2e = T1r + T1w;
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E T1l, T1b, T1k, T1J, T1L, T1x, T1I, T1K, T1m;
Chris@82 204 T1l = TN - T1a;
Chris@82 205 T1b = TN + T1a;
Chris@82 206 T1k = FNMS(KP250000000, T1b, Tk);
Chris@82 207 T1x = T1r - T1w;
Chris@82 208 T1I = T1C - T1H;
Chris@82 209 T1J = FMA(KP618033988, T1I, T1x);
Chris@82 210 T1L = FNMS(KP618033988, T1x, T1I);
Chris@82 211 ri[WS(rs, 5)] = Tk + T1b;
Chris@82 212 T1K = FNMS(KP559016994, T1l, T1k);
Chris@82 213 ri[WS(rs, 7)] = FNMS(KP951056516, T1L, T1K);
Chris@82 214 ri[WS(rs, 3)] = FMA(KP951056516, T1L, T1K);
Chris@82 215 T1m = FMA(KP559016994, T1l, T1k);
Chris@82 216 ri[WS(rs, 9)] = FNMS(KP951056516, T1J, T1m);
Chris@82 217 ri[WS(rs, 1)] = FMA(KP951056516, T1J, T1m);
Chris@82 218 }
Chris@82 219 {
Chris@82 220 E T2i, T2g, T2h, T2m, T2o, T2k, T2l, T2n, T2j;
Chris@82 221 T2i = T2e - T2f;
Chris@82 222 T2g = T2e + T2f;
Chris@82 223 T2h = FNMS(KP250000000, T2g, T2d);
Chris@82 224 T2k = Tz - TM;
Chris@82 225 T2l = TW - T19;
Chris@82 226 T2m = FMA(KP618033988, T2l, T2k);
Chris@82 227 T2o = FNMS(KP618033988, T2k, T2l);
Chris@82 228 ii[WS(rs, 5)] = T2g + T2d;
Chris@82 229 T2n = FNMS(KP559016994, T2i, T2h);
Chris@82 230 ii[WS(rs, 3)] = FNMS(KP951056516, T2o, T2n);
Chris@82 231 ii[WS(rs, 7)] = FMA(KP951056516, T2o, T2n);
Chris@82 232 T2j = FMA(KP559016994, T2i, T2h);
Chris@82 233 ii[WS(rs, 1)] = FNMS(KP951056516, T2m, T2j);
Chris@82 234 ii[WS(rs, 9)] = FMA(KP951056516, T2m, T2j);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1N, T1j, T1M, T1V, T1X, T1R, T1U, T1W, T1O;
Chris@82 238 T1N = T1f - T1i;
Chris@82 239 T1j = T1f + T1i;
Chris@82 240 T1M = FNMS(KP250000000, T1j, T1c);
Chris@82 241 T1R = T1P - T1Q;
Chris@82 242 T1U = T1S - T1T;
Chris@82 243 T1V = FNMS(KP618033988, T1U, T1R);
Chris@82 244 T1X = FMA(KP618033988, T1R, T1U);
Chris@82 245 ri[0] = T1c + T1j;
Chris@82 246 T1W = FMA(KP559016994, T1N, T1M);
Chris@82 247 ri[WS(rs, 4)] = FNMS(KP951056516, T1X, T1W);
Chris@82 248 ri[WS(rs, 6)] = FMA(KP951056516, T1X, T1W);
Chris@82 249 T1O = FNMS(KP559016994, T1N, T1M);
Chris@82 250 ri[WS(rs, 2)] = FNMS(KP951056516, T1V, T1O);
Chris@82 251 ri[WS(rs, 8)] = FMA(KP951056516, T1V, T1O);
Chris@82 252 }
Chris@82 253 {
Chris@82 254 E T26, T20, T25, T2a, T2c, T28, T29, T2b, T27;
Chris@82 255 T26 = T1Y - T1Z;
Chris@82 256 T20 = T1Y + T1Z;
Chris@82 257 T25 = FNMS(KP250000000, T20, T24);
Chris@82 258 T28 = T1g - T1h;
Chris@82 259 T29 = T1d - T1e;
Chris@82 260 T2a = FNMS(KP618033988, T29, T28);
Chris@82 261 T2c = FMA(KP618033988, T28, T29);
Chris@82 262 ii[0] = T20 + T24;
Chris@82 263 T2b = FMA(KP559016994, T26, T25);
Chris@82 264 ii[WS(rs, 4)] = FMA(KP951056516, T2c, T2b);
Chris@82 265 ii[WS(rs, 6)] = FNMS(KP951056516, T2c, T2b);
Chris@82 266 T27 = FNMS(KP559016994, T26, T25);
Chris@82 267 ii[WS(rs, 2)] = FMA(KP951056516, T2a, T27);
Chris@82 268 ii[WS(rs, 8)] = FNMS(KP951056516, T2a, T27);
Chris@82 269 }
Chris@82 270 }
Chris@82 271 }
Chris@82 272 }
Chris@82 273 }
Chris@82 274
Chris@82 275 static const tw_instr twinstr[] = {
Chris@82 276 {TW_CEXP, 0, 1},
Chris@82 277 {TW_CEXP, 0, 3},
Chris@82 278 {TW_CEXP, 0, 9},
Chris@82 279 {TW_NEXT, 1, 0}
Chris@82 280 };
Chris@82 281
Chris@82 282 static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {48, 28, 66, 0}, 0, 0, 0 };
Chris@82 283
Chris@82 284 void X(codelet_t2_10) (planner *p) {
Chris@82 285 X(kdft_dit_register) (p, t2_10, &desc);
Chris@82 286 }
Chris@82 287 #else
Chris@82 288
Chris@82 289 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 10 -name t2_10 -include dft/scalar/t.h */
Chris@82 290
Chris@82 291 /*
Chris@82 292 * This function contains 114 FP additions, 80 FP multiplications,
Chris@82 293 * (or, 76 additions, 42 multiplications, 38 fused multiply/add),
Chris@82 294 * 63 stack variables, 4 constants, and 40 memory accesses
Chris@82 295 */
Chris@82 296 #include "dft/scalar/t.h"
Chris@82 297
Chris@82 298 static void t2_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 299 {
Chris@82 300 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 301 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 302 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 303 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 304 {
Chris@82 305 INT m;
Chris@82 306 for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(20, rs)) {
Chris@82 307 E T2, T5, T3, T6, T8, Tm, Tc, Tk, T9, Td, Te, TM, TO, Tg, Tp;
Chris@82 308 E Tv, Tx, Tr;
Chris@82 309 {
Chris@82 310 E T4, Tb, T7, Ta;
Chris@82 311 T2 = W[0];
Chris@82 312 T5 = W[1];
Chris@82 313 T3 = W[2];
Chris@82 314 T6 = W[3];
Chris@82 315 T4 = T2 * T3;
Chris@82 316 Tb = T5 * T3;
Chris@82 317 T7 = T5 * T6;
Chris@82 318 Ta = T2 * T6;
Chris@82 319 T8 = T4 - T7;
Chris@82 320 Tm = Ta - Tb;
Chris@82 321 Tc = Ta + Tb;
Chris@82 322 Tk = T4 + T7;
Chris@82 323 T9 = W[4];
Chris@82 324 Td = W[5];
Chris@82 325 Te = FMA(T8, T9, Tc * Td);
Chris@82 326 TM = FMA(T3, T9, T6 * Td);
Chris@82 327 TO = FNMS(T6, T9, T3 * Td);
Chris@82 328 Tg = FNMS(Tc, T9, T8 * Td);
Chris@82 329 Tp = FMA(Tk, T9, Tm * Td);
Chris@82 330 Tv = FMA(T2, T9, T5 * Td);
Chris@82 331 Tx = FNMS(T5, T9, T2 * Td);
Chris@82 332 Tr = FNMS(Tm, T9, Tk * Td);
Chris@82 333 }
Chris@82 334 {
Chris@82 335 E Tj, T1S, TX, T1G, TL, TU, TV, T1s, T1t, T1C, T11, T12, T13, T1h, T1k;
Chris@82 336 E T1Q, Tu, TD, TE, T1v, T1w, T1B, TY, TZ, T10, T1a, T1d, T1P;
Chris@82 337 {
Chris@82 338 E T1, T1F, Ti, T1E, Tf, Th;
Chris@82 339 T1 = ri[0];
Chris@82 340 T1F = ii[0];
Chris@82 341 Tf = ri[WS(rs, 5)];
Chris@82 342 Th = ii[WS(rs, 5)];
Chris@82 343 Ti = FMA(Te, Tf, Tg * Th);
Chris@82 344 T1E = FNMS(Tg, Tf, Te * Th);
Chris@82 345 Tj = T1 - Ti;
Chris@82 346 T1S = T1F - T1E;
Chris@82 347 TX = T1 + Ti;
Chris@82 348 T1G = T1E + T1F;
Chris@82 349 }
Chris@82 350 {
Chris@82 351 E TH, T1f, TT, T1j, TK, T1g, TQ, T1i;
Chris@82 352 {
Chris@82 353 E TF, TG, TR, TS;
Chris@82 354 TF = ri[WS(rs, 4)];
Chris@82 355 TG = ii[WS(rs, 4)];
Chris@82 356 TH = FMA(T8, TF, Tc * TG);
Chris@82 357 T1f = FNMS(Tc, TF, T8 * TG);
Chris@82 358 TR = ri[WS(rs, 1)];
Chris@82 359 TS = ii[WS(rs, 1)];
Chris@82 360 TT = FMA(T2, TR, T5 * TS);
Chris@82 361 T1j = FNMS(T5, TR, T2 * TS);
Chris@82 362 }
Chris@82 363 {
Chris@82 364 E TI, TJ, TN, TP;
Chris@82 365 TI = ri[WS(rs, 9)];
Chris@82 366 TJ = ii[WS(rs, 9)];
Chris@82 367 TK = FMA(T9, TI, Td * TJ);
Chris@82 368 T1g = FNMS(Td, TI, T9 * TJ);
Chris@82 369 TN = ri[WS(rs, 6)];
Chris@82 370 TP = ii[WS(rs, 6)];
Chris@82 371 TQ = FMA(TM, TN, TO * TP);
Chris@82 372 T1i = FNMS(TO, TN, TM * TP);
Chris@82 373 }
Chris@82 374 TL = TH - TK;
Chris@82 375 TU = TQ - TT;
Chris@82 376 TV = TL + TU;
Chris@82 377 T1s = T1f + T1g;
Chris@82 378 T1t = T1i + T1j;
Chris@82 379 T1C = T1s + T1t;
Chris@82 380 T11 = TH + TK;
Chris@82 381 T12 = TQ + TT;
Chris@82 382 T13 = T11 + T12;
Chris@82 383 T1h = T1f - T1g;
Chris@82 384 T1k = T1i - T1j;
Chris@82 385 T1Q = T1h + T1k;
Chris@82 386 }
Chris@82 387 {
Chris@82 388 E To, T18, TC, T1c, Tt, T19, Tz, T1b;
Chris@82 389 {
Chris@82 390 E Tl, Tn, TA, TB;
Chris@82 391 Tl = ri[WS(rs, 2)];
Chris@82 392 Tn = ii[WS(rs, 2)];
Chris@82 393 To = FMA(Tk, Tl, Tm * Tn);
Chris@82 394 T18 = FNMS(Tm, Tl, Tk * Tn);
Chris@82 395 TA = ri[WS(rs, 3)];
Chris@82 396 TB = ii[WS(rs, 3)];
Chris@82 397 TC = FMA(T3, TA, T6 * TB);
Chris@82 398 T1c = FNMS(T6, TA, T3 * TB);
Chris@82 399 }
Chris@82 400 {
Chris@82 401 E Tq, Ts, Tw, Ty;
Chris@82 402 Tq = ri[WS(rs, 7)];
Chris@82 403 Ts = ii[WS(rs, 7)];
Chris@82 404 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 405 T19 = FNMS(Tr, Tq, Tp * Ts);
Chris@82 406 Tw = ri[WS(rs, 8)];
Chris@82 407 Ty = ii[WS(rs, 8)];
Chris@82 408 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 409 T1b = FNMS(Tx, Tw, Tv * Ty);
Chris@82 410 }
Chris@82 411 Tu = To - Tt;
Chris@82 412 TD = Tz - TC;
Chris@82 413 TE = Tu + TD;
Chris@82 414 T1v = T18 + T19;
Chris@82 415 T1w = T1b + T1c;
Chris@82 416 T1B = T1v + T1w;
Chris@82 417 TY = To + Tt;
Chris@82 418 TZ = Tz + TC;
Chris@82 419 T10 = TY + TZ;
Chris@82 420 T1a = T18 - T19;
Chris@82 421 T1d = T1b - T1c;
Chris@82 422 T1P = T1a + T1d;
Chris@82 423 }
Chris@82 424 {
Chris@82 425 E T15, TW, T16, T1m, T1o, T1e, T1l, T1n, T17;
Chris@82 426 T15 = KP559016994 * (TE - TV);
Chris@82 427 TW = TE + TV;
Chris@82 428 T16 = FNMS(KP250000000, TW, Tj);
Chris@82 429 T1e = T1a - T1d;
Chris@82 430 T1l = T1h - T1k;
Chris@82 431 T1m = FMA(KP951056516, T1e, KP587785252 * T1l);
Chris@82 432 T1o = FNMS(KP587785252, T1e, KP951056516 * T1l);
Chris@82 433 ri[WS(rs, 5)] = Tj + TW;
Chris@82 434 T1n = T16 - T15;
Chris@82 435 ri[WS(rs, 7)] = T1n - T1o;
Chris@82 436 ri[WS(rs, 3)] = T1n + T1o;
Chris@82 437 T17 = T15 + T16;
Chris@82 438 ri[WS(rs, 9)] = T17 - T1m;
Chris@82 439 ri[WS(rs, 1)] = T17 + T1m;
Chris@82 440 }
Chris@82 441 {
Chris@82 442 E T1R, T1T, T1U, T1Y, T20, T1W, T1X, T1Z, T1V;
Chris@82 443 T1R = KP559016994 * (T1P - T1Q);
Chris@82 444 T1T = T1P + T1Q;
Chris@82 445 T1U = FNMS(KP250000000, T1T, T1S);
Chris@82 446 T1W = Tu - TD;
Chris@82 447 T1X = TL - TU;
Chris@82 448 T1Y = FMA(KP951056516, T1W, KP587785252 * T1X);
Chris@82 449 T20 = FNMS(KP587785252, T1W, KP951056516 * T1X);
Chris@82 450 ii[WS(rs, 5)] = T1T + T1S;
Chris@82 451 T1Z = T1U - T1R;
Chris@82 452 ii[WS(rs, 3)] = T1Z - T20;
Chris@82 453 ii[WS(rs, 7)] = T20 + T1Z;
Chris@82 454 T1V = T1R + T1U;
Chris@82 455 ii[WS(rs, 1)] = T1V - T1Y;
Chris@82 456 ii[WS(rs, 9)] = T1Y + T1V;
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E T1q, T14, T1p, T1y, T1A, T1u, T1x, T1z, T1r;
Chris@82 460 T1q = KP559016994 * (T10 - T13);
Chris@82 461 T14 = T10 + T13;
Chris@82 462 T1p = FNMS(KP250000000, T14, TX);
Chris@82 463 T1u = T1s - T1t;
Chris@82 464 T1x = T1v - T1w;
Chris@82 465 T1y = FNMS(KP587785252, T1x, KP951056516 * T1u);
Chris@82 466 T1A = FMA(KP951056516, T1x, KP587785252 * T1u);
Chris@82 467 ri[0] = TX + T14;
Chris@82 468 T1z = T1q + T1p;
Chris@82 469 ri[WS(rs, 4)] = T1z - T1A;
Chris@82 470 ri[WS(rs, 6)] = T1z + T1A;
Chris@82 471 T1r = T1p - T1q;
Chris@82 472 ri[WS(rs, 2)] = T1r - T1y;
Chris@82 473 ri[WS(rs, 8)] = T1r + T1y;
Chris@82 474 }
Chris@82 475 {
Chris@82 476 E T1L, T1D, T1K, T1J, T1N, T1H, T1I, T1O, T1M;
Chris@82 477 T1L = KP559016994 * (T1B - T1C);
Chris@82 478 T1D = T1B + T1C;
Chris@82 479 T1K = FNMS(KP250000000, T1D, T1G);
Chris@82 480 T1H = T11 - T12;
Chris@82 481 T1I = TY - TZ;
Chris@82 482 T1J = FNMS(KP587785252, T1I, KP951056516 * T1H);
Chris@82 483 T1N = FMA(KP951056516, T1I, KP587785252 * T1H);
Chris@82 484 ii[0] = T1D + T1G;
Chris@82 485 T1O = T1L + T1K;
Chris@82 486 ii[WS(rs, 4)] = T1N + T1O;
Chris@82 487 ii[WS(rs, 6)] = T1O - T1N;
Chris@82 488 T1M = T1K - T1L;
Chris@82 489 ii[WS(rs, 2)] = T1J + T1M;
Chris@82 490 ii[WS(rs, 8)] = T1M - T1J;
Chris@82 491 }
Chris@82 492 }
Chris@82 493 }
Chris@82 494 }
Chris@82 495 }
Chris@82 496
Chris@82 497 static const tw_instr twinstr[] = {
Chris@82 498 {TW_CEXP, 0, 1},
Chris@82 499 {TW_CEXP, 0, 3},
Chris@82 500 {TW_CEXP, 0, 9},
Chris@82 501 {TW_NEXT, 1, 0}
Chris@82 502 };
Chris@82 503
Chris@82 504 static const ct_desc desc = { 10, "t2_10", twinstr, &GENUS, {76, 42, 38, 0}, 0, 0, 0 };
Chris@82 505
Chris@82 506 void X(codelet_t2_10) (planner *p) {
Chris@82 507 X(kdft_dit_register) (p, t2_10, &desc);
Chris@82 508 }
Chris@82 509 #endif