annotate src/fftw-3.3.8/dft/scalar/codelets/t1_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:14 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 12 -name t1_12 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 118 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 72 additions, 22 multiplications, 46 fused multiply/add),
Chris@82 33 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t1_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + (mb * 22); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 44 E T1, T2i, Tl, T2e, T10, T1Y, TG, T1S, Ty, T2r, T1s, T2f, T1d, T21, T1H;
Chris@82 45 E T1Z, Te, T2o, T1l, T2h, TT, T1V, T1A, T1T;
Chris@82 46 T1 = ri[0];
Chris@82 47 T2i = ii[0];
Chris@82 48 {
Chris@82 49 E Th, Tk, Ti, T2d, Tg, Tj;
Chris@82 50 Th = ri[WS(rs, 6)];
Chris@82 51 Tk = ii[WS(rs, 6)];
Chris@82 52 Tg = W[10];
Chris@82 53 Ti = Tg * Th;
Chris@82 54 T2d = Tg * Tk;
Chris@82 55 Tj = W[11];
Chris@82 56 Tl = FMA(Tj, Tk, Ti);
Chris@82 57 T2e = FNMS(Tj, Th, T2d);
Chris@82 58 }
Chris@82 59 {
Chris@82 60 E TW, TZ, TX, T1X, TV, TY;
Chris@82 61 TW = ri[WS(rs, 9)];
Chris@82 62 TZ = ii[WS(rs, 9)];
Chris@82 63 TV = W[16];
Chris@82 64 TX = TV * TW;
Chris@82 65 T1X = TV * TZ;
Chris@82 66 TY = W[17];
Chris@82 67 T10 = FMA(TY, TZ, TX);
Chris@82 68 T1Y = FNMS(TY, TW, T1X);
Chris@82 69 }
Chris@82 70 {
Chris@82 71 E TC, TF, TD, T1R, TB, TE;
Chris@82 72 TC = ri[WS(rs, 3)];
Chris@82 73 TF = ii[WS(rs, 3)];
Chris@82 74 TB = W[4];
Chris@82 75 TD = TB * TC;
Chris@82 76 T1R = TB * TF;
Chris@82 77 TE = W[5];
Chris@82 78 TG = FMA(TE, TF, TD);
Chris@82 79 T1S = FNMS(TE, TC, T1R);
Chris@82 80 }
Chris@82 81 {
Chris@82 82 E Tn, Tq, To, T1o, Tt, Tw, Tu, T1q, Tm, Ts;
Chris@82 83 Tn = ri[WS(rs, 10)];
Chris@82 84 Tq = ii[WS(rs, 10)];
Chris@82 85 Tm = W[18];
Chris@82 86 To = Tm * Tn;
Chris@82 87 T1o = Tm * Tq;
Chris@82 88 Tt = ri[WS(rs, 2)];
Chris@82 89 Tw = ii[WS(rs, 2)];
Chris@82 90 Ts = W[2];
Chris@82 91 Tu = Ts * Tt;
Chris@82 92 T1q = Ts * Tw;
Chris@82 93 {
Chris@82 94 E Tr, T1p, Tx, T1r, Tp, Tv;
Chris@82 95 Tp = W[19];
Chris@82 96 Tr = FMA(Tp, Tq, To);
Chris@82 97 T1p = FNMS(Tp, Tn, T1o);
Chris@82 98 Tv = W[3];
Chris@82 99 Tx = FMA(Tv, Tw, Tu);
Chris@82 100 T1r = FNMS(Tv, Tt, T1q);
Chris@82 101 Ty = Tr + Tx;
Chris@82 102 T2r = Tx - Tr;
Chris@82 103 T1s = T1p - T1r;
Chris@82 104 T2f = T1p + T1r;
Chris@82 105 }
Chris@82 106 }
Chris@82 107 {
Chris@82 108 E T12, T15, T13, T1D, T18, T1b, T19, T1F, T11, T17;
Chris@82 109 T12 = ri[WS(rs, 1)];
Chris@82 110 T15 = ii[WS(rs, 1)];
Chris@82 111 T11 = W[0];
Chris@82 112 T13 = T11 * T12;
Chris@82 113 T1D = T11 * T15;
Chris@82 114 T18 = ri[WS(rs, 5)];
Chris@82 115 T1b = ii[WS(rs, 5)];
Chris@82 116 T17 = W[8];
Chris@82 117 T19 = T17 * T18;
Chris@82 118 T1F = T17 * T1b;
Chris@82 119 {
Chris@82 120 E T16, T1E, T1c, T1G, T14, T1a;
Chris@82 121 T14 = W[1];
Chris@82 122 T16 = FMA(T14, T15, T13);
Chris@82 123 T1E = FNMS(T14, T12, T1D);
Chris@82 124 T1a = W[9];
Chris@82 125 T1c = FMA(T1a, T1b, T19);
Chris@82 126 T1G = FNMS(T1a, T18, T1F);
Chris@82 127 T1d = T16 + T1c;
Chris@82 128 T21 = T1c - T16;
Chris@82 129 T1H = T1E - T1G;
Chris@82 130 T1Z = T1E + T1G;
Chris@82 131 }
Chris@82 132 }
Chris@82 133 {
Chris@82 134 E T3, T6, T4, T1h, T9, Tc, Ta, T1j, T2, T8;
Chris@82 135 T3 = ri[WS(rs, 4)];
Chris@82 136 T6 = ii[WS(rs, 4)];
Chris@82 137 T2 = W[6];
Chris@82 138 T4 = T2 * T3;
Chris@82 139 T1h = T2 * T6;
Chris@82 140 T9 = ri[WS(rs, 8)];
Chris@82 141 Tc = ii[WS(rs, 8)];
Chris@82 142 T8 = W[14];
Chris@82 143 Ta = T8 * T9;
Chris@82 144 T1j = T8 * Tc;
Chris@82 145 {
Chris@82 146 E T7, T1i, Td, T1k, T5, Tb;
Chris@82 147 T5 = W[7];
Chris@82 148 T7 = FMA(T5, T6, T4);
Chris@82 149 T1i = FNMS(T5, T3, T1h);
Chris@82 150 Tb = W[15];
Chris@82 151 Td = FMA(Tb, Tc, Ta);
Chris@82 152 T1k = FNMS(Tb, T9, T1j);
Chris@82 153 Te = T7 + Td;
Chris@82 154 T2o = Td - T7;
Chris@82 155 T1l = T1i - T1k;
Chris@82 156 T2h = T1i + T1k;
Chris@82 157 }
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TI, TL, TJ, T1w, TO, TR, TP, T1y, TH, TN;
Chris@82 161 TI = ri[WS(rs, 7)];
Chris@82 162 TL = ii[WS(rs, 7)];
Chris@82 163 TH = W[12];
Chris@82 164 TJ = TH * TI;
Chris@82 165 T1w = TH * TL;
Chris@82 166 TO = ri[WS(rs, 11)];
Chris@82 167 TR = ii[WS(rs, 11)];
Chris@82 168 TN = W[20];
Chris@82 169 TP = TN * TO;
Chris@82 170 T1y = TN * TR;
Chris@82 171 {
Chris@82 172 E TM, T1x, TS, T1z, TK, TQ;
Chris@82 173 TK = W[13];
Chris@82 174 TM = FMA(TK, TL, TJ);
Chris@82 175 T1x = FNMS(TK, TI, T1w);
Chris@82 176 TQ = W[21];
Chris@82 177 TS = FMA(TQ, TR, TP);
Chris@82 178 T1z = FNMS(TQ, TO, T1y);
Chris@82 179 TT = TM + TS;
Chris@82 180 T1V = TS - TM;
Chris@82 181 T1A = T1x - T1z;
Chris@82 182 T1T = T1x + T1z;
Chris@82 183 }
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E TA, T28, T2k, T2m, T1f, T2l, T2b, T2c;
Chris@82 187 {
Chris@82 188 E Tf, Tz, T2g, T2j;
Chris@82 189 Tf = T1 + Te;
Chris@82 190 Tz = Tl + Ty;
Chris@82 191 TA = Tf + Tz;
Chris@82 192 T28 = Tf - Tz;
Chris@82 193 T2g = T2e + T2f;
Chris@82 194 T2j = T2h + T2i;
Chris@82 195 T2k = T2g + T2j;
Chris@82 196 T2m = T2j - T2g;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E TU, T1e, T29, T2a;
Chris@82 200 TU = TG + TT;
Chris@82 201 T1e = T10 + T1d;
Chris@82 202 T1f = TU + T1e;
Chris@82 203 T2l = TU - T1e;
Chris@82 204 T29 = T1S + T1T;
Chris@82 205 T2a = T1Y + T1Z;
Chris@82 206 T2b = T29 - T2a;
Chris@82 207 T2c = T29 + T2a;
Chris@82 208 }
Chris@82 209 ri[WS(rs, 6)] = TA - T1f;
Chris@82 210 ii[WS(rs, 6)] = T2k - T2c;
Chris@82 211 ri[0] = TA + T1f;
Chris@82 212 ii[0] = T2c + T2k;
Chris@82 213 ri[WS(rs, 3)] = T28 - T2b;
Chris@82 214 ii[WS(rs, 3)] = T2l + T2m;
Chris@82 215 ri[WS(rs, 9)] = T28 + T2b;
Chris@82 216 ii[WS(rs, 9)] = T2m - T2l;
Chris@82 217 }
Chris@82 218 {
Chris@82 219 E T1m, T1K, T2p, T2y, T2s, T2x, T1t, T1L, T1B, T1N, T1W, T25, T22, T26, T1I;
Chris@82 220 E T1O;
Chris@82 221 {
Chris@82 222 E T1g, T2n, T2q, T1n;
Chris@82 223 T1g = FNMS(KP500000000, Te, T1);
Chris@82 224 T1m = FNMS(KP866025403, T1l, T1g);
Chris@82 225 T1K = FMA(KP866025403, T1l, T1g);
Chris@82 226 T2n = FNMS(KP500000000, T2h, T2i);
Chris@82 227 T2p = FMA(KP866025403, T2o, T2n);
Chris@82 228 T2y = FNMS(KP866025403, T2o, T2n);
Chris@82 229 T2q = FNMS(KP500000000, T2f, T2e);
Chris@82 230 T2s = FMA(KP866025403, T2r, T2q);
Chris@82 231 T2x = FNMS(KP866025403, T2r, T2q);
Chris@82 232 T1n = FNMS(KP500000000, Ty, Tl);
Chris@82 233 T1t = FNMS(KP866025403, T1s, T1n);
Chris@82 234 T1L = FMA(KP866025403, T1s, T1n);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1v, T1U, T20, T1C;
Chris@82 238 T1v = FNMS(KP500000000, TT, TG);
Chris@82 239 T1B = FNMS(KP866025403, T1A, T1v);
Chris@82 240 T1N = FMA(KP866025403, T1A, T1v);
Chris@82 241 T1U = FNMS(KP500000000, T1T, T1S);
Chris@82 242 T1W = FMA(KP866025403, T1V, T1U);
Chris@82 243 T25 = FNMS(KP866025403, T1V, T1U);
Chris@82 244 T20 = FNMS(KP500000000, T1Z, T1Y);
Chris@82 245 T22 = FMA(KP866025403, T21, T20);
Chris@82 246 T26 = FNMS(KP866025403, T21, T20);
Chris@82 247 T1C = FNMS(KP500000000, T1d, T10);
Chris@82 248 T1I = FNMS(KP866025403, T1H, T1C);
Chris@82 249 T1O = FMA(KP866025403, T1H, T1C);
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T1u, T1J, T2z, T2A;
Chris@82 253 T1u = T1m + T1t;
Chris@82 254 T1J = T1B + T1I;
Chris@82 255 ri[WS(rs, 2)] = T1u - T1J;
Chris@82 256 ri[WS(rs, 8)] = T1u + T1J;
Chris@82 257 T2z = T2x + T2y;
Chris@82 258 T2A = T25 + T26;
Chris@82 259 ii[WS(rs, 2)] = T2z - T2A;
Chris@82 260 ii[WS(rs, 8)] = T2A + T2z;
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E T1M, T1P, T2v, T2w;
Chris@82 264 T1M = T1K + T1L;
Chris@82 265 T1P = T1N + T1O;
Chris@82 266 ri[WS(rs, 10)] = T1M - T1P;
Chris@82 267 ri[WS(rs, 4)] = T1M + T1P;
Chris@82 268 T2v = T1W + T22;
Chris@82 269 T2w = T2s + T2p;
Chris@82 270 ii[WS(rs, 4)] = T2v + T2w;
Chris@82 271 ii[WS(rs, 10)] = T2w - T2v;
Chris@82 272 }
Chris@82 273 {
Chris@82 274 E T1Q, T23, T2t, T2u;
Chris@82 275 T1Q = T1K - T1L;
Chris@82 276 T23 = T1W - T22;
Chris@82 277 ri[WS(rs, 7)] = T1Q - T23;
Chris@82 278 ri[WS(rs, 1)] = T1Q + T23;
Chris@82 279 T2t = T2p - T2s;
Chris@82 280 T2u = T1N - T1O;
Chris@82 281 ii[WS(rs, 1)] = T2t - T2u;
Chris@82 282 ii[WS(rs, 7)] = T2u + T2t;
Chris@82 283 }
Chris@82 284 {
Chris@82 285 E T24, T27, T2B, T2C;
Chris@82 286 T24 = T1m - T1t;
Chris@82 287 T27 = T25 - T26;
Chris@82 288 ri[WS(rs, 11)] = T24 - T27;
Chris@82 289 ri[WS(rs, 5)] = T24 + T27;
Chris@82 290 T2B = T2y - T2x;
Chris@82 291 T2C = T1B - T1I;
Chris@82 292 ii[WS(rs, 5)] = T2B - T2C;
Chris@82 293 ii[WS(rs, 11)] = T2C + T2B;
Chris@82 294 }
Chris@82 295 }
Chris@82 296 }
Chris@82 297 }
Chris@82 298 }
Chris@82 299
Chris@82 300 static const tw_instr twinstr[] = {
Chris@82 301 {TW_FULL, 0, 12},
Chris@82 302 {TW_NEXT, 1, 0}
Chris@82 303 };
Chris@82 304
Chris@82 305 static const ct_desc desc = { 12, "t1_12", twinstr, &GENUS, {72, 22, 46, 0}, 0, 0, 0 };
Chris@82 306
Chris@82 307 void X(codelet_t1_12) (planner *p) {
Chris@82 308 X(kdft_dit_register) (p, t1_12, &desc);
Chris@82 309 }
Chris@82 310 #else
Chris@82 311
Chris@82 312 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 12 -name t1_12 -include dft/scalar/t.h */
Chris@82 313
Chris@82 314 /*
Chris@82 315 * This function contains 118 FP additions, 60 FP multiplications,
Chris@82 316 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 317 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 318 */
Chris@82 319 #include "dft/scalar/t.h"
Chris@82 320
Chris@82 321 static void t1_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 322 {
Chris@82 323 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 324 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 325 {
Chris@82 326 INT m;
Chris@82 327 for (m = mb, W = W + (mb * 22); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 22, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 328 E T1, T1W, T18, T21, Tc, T15, T1V, T22, TR, T1E, T1o, T1D, T12, T1l, T1F;
Chris@82 329 E T1G, Ti, T1S, T1d, T24, Tt, T1a, T1T, T25, TA, T1z, T1j, T1y, TL, T1g;
Chris@82 330 E T1A, T1B;
Chris@82 331 {
Chris@82 332 E T6, T16, Tb, T17;
Chris@82 333 T1 = ri[0];
Chris@82 334 T1W = ii[0];
Chris@82 335 {
Chris@82 336 E T3, T5, T2, T4;
Chris@82 337 T3 = ri[WS(rs, 4)];
Chris@82 338 T5 = ii[WS(rs, 4)];
Chris@82 339 T2 = W[6];
Chris@82 340 T4 = W[7];
Chris@82 341 T6 = FMA(T2, T3, T4 * T5);
Chris@82 342 T16 = FNMS(T4, T3, T2 * T5);
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T8, Ta, T7, T9;
Chris@82 346 T8 = ri[WS(rs, 8)];
Chris@82 347 Ta = ii[WS(rs, 8)];
Chris@82 348 T7 = W[14];
Chris@82 349 T9 = W[15];
Chris@82 350 Tb = FMA(T7, T8, T9 * Ta);
Chris@82 351 T17 = FNMS(T9, T8, T7 * Ta);
Chris@82 352 }
Chris@82 353 T18 = KP866025403 * (T16 - T17);
Chris@82 354 T21 = KP866025403 * (Tb - T6);
Chris@82 355 Tc = T6 + Tb;
Chris@82 356 T15 = FNMS(KP500000000, Tc, T1);
Chris@82 357 T1V = T16 + T17;
Chris@82 358 T22 = FNMS(KP500000000, T1V, T1W);
Chris@82 359 }
Chris@82 360 {
Chris@82 361 E T11, T1n, TW, T1m;
Chris@82 362 {
Chris@82 363 E TO, TQ, TN, TP;
Chris@82 364 TO = ri[WS(rs, 9)];
Chris@82 365 TQ = ii[WS(rs, 9)];
Chris@82 366 TN = W[16];
Chris@82 367 TP = W[17];
Chris@82 368 TR = FMA(TN, TO, TP * TQ);
Chris@82 369 T1E = FNMS(TP, TO, TN * TQ);
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E TY, T10, TX, TZ;
Chris@82 373 TY = ri[WS(rs, 5)];
Chris@82 374 T10 = ii[WS(rs, 5)];
Chris@82 375 TX = W[8];
Chris@82 376 TZ = W[9];
Chris@82 377 T11 = FMA(TX, TY, TZ * T10);
Chris@82 378 T1n = FNMS(TZ, TY, TX * T10);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E TT, TV, TS, TU;
Chris@82 382 TT = ri[WS(rs, 1)];
Chris@82 383 TV = ii[WS(rs, 1)];
Chris@82 384 TS = W[0];
Chris@82 385 TU = W[1];
Chris@82 386 TW = FMA(TS, TT, TU * TV);
Chris@82 387 T1m = FNMS(TU, TT, TS * TV);
Chris@82 388 }
Chris@82 389 T1o = KP866025403 * (T1m - T1n);
Chris@82 390 T1D = KP866025403 * (T11 - TW);
Chris@82 391 T12 = TW + T11;
Chris@82 392 T1l = FNMS(KP500000000, T12, TR);
Chris@82 393 T1F = T1m + T1n;
Chris@82 394 T1G = FNMS(KP500000000, T1F, T1E);
Chris@82 395 }
Chris@82 396 {
Chris@82 397 E Ts, T1c, Tn, T1b;
Chris@82 398 {
Chris@82 399 E Tf, Th, Te, Tg;
Chris@82 400 Tf = ri[WS(rs, 6)];
Chris@82 401 Th = ii[WS(rs, 6)];
Chris@82 402 Te = W[10];
Chris@82 403 Tg = W[11];
Chris@82 404 Ti = FMA(Te, Tf, Tg * Th);
Chris@82 405 T1S = FNMS(Tg, Tf, Te * Th);
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E Tp, Tr, To, Tq;
Chris@82 409 Tp = ri[WS(rs, 2)];
Chris@82 410 Tr = ii[WS(rs, 2)];
Chris@82 411 To = W[2];
Chris@82 412 Tq = W[3];
Chris@82 413 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 414 T1c = FNMS(Tq, Tp, To * Tr);
Chris@82 415 }
Chris@82 416 {
Chris@82 417 E Tk, Tm, Tj, Tl;
Chris@82 418 Tk = ri[WS(rs, 10)];
Chris@82 419 Tm = ii[WS(rs, 10)];
Chris@82 420 Tj = W[18];
Chris@82 421 Tl = W[19];
Chris@82 422 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 423 T1b = FNMS(Tl, Tk, Tj * Tm);
Chris@82 424 }
Chris@82 425 T1d = KP866025403 * (T1b - T1c);
Chris@82 426 T24 = KP866025403 * (Ts - Tn);
Chris@82 427 Tt = Tn + Ts;
Chris@82 428 T1a = FNMS(KP500000000, Tt, Ti);
Chris@82 429 T1T = T1b + T1c;
Chris@82 430 T25 = FNMS(KP500000000, T1T, T1S);
Chris@82 431 }
Chris@82 432 {
Chris@82 433 E TK, T1i, TF, T1h;
Chris@82 434 {
Chris@82 435 E Tx, Tz, Tw, Ty;
Chris@82 436 Tx = ri[WS(rs, 3)];
Chris@82 437 Tz = ii[WS(rs, 3)];
Chris@82 438 Tw = W[4];
Chris@82 439 Ty = W[5];
Chris@82 440 TA = FMA(Tw, Tx, Ty * Tz);
Chris@82 441 T1z = FNMS(Ty, Tx, Tw * Tz);
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E TH, TJ, TG, TI;
Chris@82 445 TH = ri[WS(rs, 11)];
Chris@82 446 TJ = ii[WS(rs, 11)];
Chris@82 447 TG = W[20];
Chris@82 448 TI = W[21];
Chris@82 449 TK = FMA(TG, TH, TI * TJ);
Chris@82 450 T1i = FNMS(TI, TH, TG * TJ);
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E TC, TE, TB, TD;
Chris@82 454 TC = ri[WS(rs, 7)];
Chris@82 455 TE = ii[WS(rs, 7)];
Chris@82 456 TB = W[12];
Chris@82 457 TD = W[13];
Chris@82 458 TF = FMA(TB, TC, TD * TE);
Chris@82 459 T1h = FNMS(TD, TC, TB * TE);
Chris@82 460 }
Chris@82 461 T1j = KP866025403 * (T1h - T1i);
Chris@82 462 T1y = KP866025403 * (TK - TF);
Chris@82 463 TL = TF + TK;
Chris@82 464 T1g = FNMS(KP500000000, TL, TA);
Chris@82 465 T1A = T1h + T1i;
Chris@82 466 T1B = FNMS(KP500000000, T1A, T1z);
Chris@82 467 }
Chris@82 468 {
Chris@82 469 E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R;
Chris@82 470 {
Chris@82 471 E Td, Tu, T1U, T1X;
Chris@82 472 Td = T1 + Tc;
Chris@82 473 Tu = Ti + Tt;
Chris@82 474 Tv = Td + Tu;
Chris@82 475 T1N = Td - Tu;
Chris@82 476 T1U = T1S + T1T;
Chris@82 477 T1X = T1V + T1W;
Chris@82 478 T1Y = T1U + T1X;
Chris@82 479 T20 = T1X - T1U;
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E TM, T13, T1O, T1P;
Chris@82 483 TM = TA + TL;
Chris@82 484 T13 = TR + T12;
Chris@82 485 T14 = TM + T13;
Chris@82 486 T1Z = TM - T13;
Chris@82 487 T1O = T1z + T1A;
Chris@82 488 T1P = T1E + T1F;
Chris@82 489 T1Q = T1O - T1P;
Chris@82 490 T1R = T1O + T1P;
Chris@82 491 }
Chris@82 492 ri[WS(rs, 6)] = Tv - T14;
Chris@82 493 ii[WS(rs, 6)] = T1Y - T1R;
Chris@82 494 ri[0] = Tv + T14;
Chris@82 495 ii[0] = T1R + T1Y;
Chris@82 496 ri[WS(rs, 3)] = T1N - T1Q;
Chris@82 497 ii[WS(rs, 3)] = T1Z + T20;
Chris@82 498 ri[WS(rs, 9)] = T1N + T1Q;
Chris@82 499 ii[WS(rs, 9)] = T20 - T1Z;
Chris@82 500 }
Chris@82 501 {
Chris@82 502 E T1t, T1x, T27, T2a, T1w, T28, T1I, T29;
Chris@82 503 {
Chris@82 504 E T1r, T1s, T23, T26;
Chris@82 505 T1r = T15 + T18;
Chris@82 506 T1s = T1a + T1d;
Chris@82 507 T1t = T1r + T1s;
Chris@82 508 T1x = T1r - T1s;
Chris@82 509 T23 = T21 + T22;
Chris@82 510 T26 = T24 + T25;
Chris@82 511 T27 = T23 - T26;
Chris@82 512 T2a = T26 + T23;
Chris@82 513 }
Chris@82 514 {
Chris@82 515 E T1u, T1v, T1C, T1H;
Chris@82 516 T1u = T1g + T1j;
Chris@82 517 T1v = T1l + T1o;
Chris@82 518 T1w = T1u + T1v;
Chris@82 519 T28 = T1u - T1v;
Chris@82 520 T1C = T1y + T1B;
Chris@82 521 T1H = T1D + T1G;
Chris@82 522 T1I = T1C - T1H;
Chris@82 523 T29 = T1C + T1H;
Chris@82 524 }
Chris@82 525 ri[WS(rs, 10)] = T1t - T1w;
Chris@82 526 ii[WS(rs, 10)] = T2a - T29;
Chris@82 527 ri[WS(rs, 4)] = T1t + T1w;
Chris@82 528 ii[WS(rs, 4)] = T29 + T2a;
Chris@82 529 ri[WS(rs, 7)] = T1x - T1I;
Chris@82 530 ii[WS(rs, 7)] = T28 + T27;
Chris@82 531 ri[WS(rs, 1)] = T1x + T1I;
Chris@82 532 ii[WS(rs, 1)] = T27 - T28;
Chris@82 533 }
Chris@82 534 {
Chris@82 535 E T1f, T1J, T2d, T2f, T1q, T2g, T1M, T2e;
Chris@82 536 {
Chris@82 537 E T19, T1e, T2b, T2c;
Chris@82 538 T19 = T15 - T18;
Chris@82 539 T1e = T1a - T1d;
Chris@82 540 T1f = T19 + T1e;
Chris@82 541 T1J = T19 - T1e;
Chris@82 542 T2b = T25 - T24;
Chris@82 543 T2c = T22 - T21;
Chris@82 544 T2d = T2b + T2c;
Chris@82 545 T2f = T2c - T2b;
Chris@82 546 }
Chris@82 547 {
Chris@82 548 E T1k, T1p, T1K, T1L;
Chris@82 549 T1k = T1g - T1j;
Chris@82 550 T1p = T1l - T1o;
Chris@82 551 T1q = T1k + T1p;
Chris@82 552 T2g = T1k - T1p;
Chris@82 553 T1K = T1B - T1y;
Chris@82 554 T1L = T1G - T1D;
Chris@82 555 T1M = T1K - T1L;
Chris@82 556 T2e = T1K + T1L;
Chris@82 557 }
Chris@82 558 ri[WS(rs, 2)] = T1f - T1q;
Chris@82 559 ii[WS(rs, 2)] = T2d - T2e;
Chris@82 560 ri[WS(rs, 8)] = T1f + T1q;
Chris@82 561 ii[WS(rs, 8)] = T2e + T2d;
Chris@82 562 ri[WS(rs, 11)] = T1J - T1M;
Chris@82 563 ii[WS(rs, 11)] = T2g + T2f;
Chris@82 564 ri[WS(rs, 5)] = T1J + T1M;
Chris@82 565 ii[WS(rs, 5)] = T2f - T2g;
Chris@82 566 }
Chris@82 567 }
Chris@82 568 }
Chris@82 569 }
Chris@82 570
Chris@82 571 static const tw_instr twinstr[] = {
Chris@82 572 {TW_FULL, 0, 12},
Chris@82 573 {TW_NEXT, 1, 0}
Chris@82 574 };
Chris@82 575
Chris@82 576 static const ct_desc desc = { 12, "t1_12", twinstr, &GENUS, {88, 30, 30, 0}, 0, 0, 0 };
Chris@82 577
Chris@82 578 void X(codelet_t1_12) (planner *p) {
Chris@82 579 X(kdft_dit_register) (p, t1_12, &desc);
Chris@82 580 }
Chris@82 581 #endif