annotate src/fftw-3.3.8/dft/scalar/codelets/t1_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:13 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@82 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@82 33 * 55 stack variables, 10 constants, and 36 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@82 40 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@82 41 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 42 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@82 43 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@82 44 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@82 45 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@82 46 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@82 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 49 {
Chris@82 50 INT m;
Chris@82 51 for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 52 E T1, T1R, Te, T1W, T10, T1Q, T1l, T1r, Ty, T1p, Tl, T1o, T1g, T1q, T1a;
Chris@82 53 E T1d, TS, T18, TF, T13, T19, T1c;
Chris@82 54 T1 = ri[0];
Chris@82 55 T1R = ii[0];
Chris@82 56 {
Chris@82 57 E T3, T6, T4, TW, T9, Tc, Ta, TY, T2, T8;
Chris@82 58 T3 = ri[WS(rs, 3)];
Chris@82 59 T6 = ii[WS(rs, 3)];
Chris@82 60 T2 = W[4];
Chris@82 61 T4 = T2 * T3;
Chris@82 62 TW = T2 * T6;
Chris@82 63 T9 = ri[WS(rs, 6)];
Chris@82 64 Tc = ii[WS(rs, 6)];
Chris@82 65 T8 = W[10];
Chris@82 66 Ta = T8 * T9;
Chris@82 67 TY = T8 * Tc;
Chris@82 68 {
Chris@82 69 E T7, TX, Td, TZ, T5, Tb;
Chris@82 70 T5 = W[5];
Chris@82 71 T7 = FMA(T5, T6, T4);
Chris@82 72 TX = FNMS(T5, T3, TW);
Chris@82 73 Tb = W[11];
Chris@82 74 Td = FMA(Tb, Tc, Ta);
Chris@82 75 TZ = FNMS(Tb, T9, TY);
Chris@82 76 Te = T7 + Td;
Chris@82 77 T1W = Td - T7;
Chris@82 78 T10 = TX - TZ;
Chris@82 79 T1Q = TX + TZ;
Chris@82 80 }
Chris@82 81 }
Chris@82 82 {
Chris@82 83 E Th, Tk, Ti, T1n, Tx, T1i, Tr, T1k, Tg, Tj;
Chris@82 84 Th = ri[WS(rs, 1)];
Chris@82 85 Tk = ii[WS(rs, 1)];
Chris@82 86 Tg = W[0];
Chris@82 87 Ti = Tg * Th;
Chris@82 88 T1n = Tg * Tk;
Chris@82 89 {
Chris@82 90 E Tt, Tw, Tu, T1h, Ts, Tv;
Chris@82 91 Tt = ri[WS(rs, 7)];
Chris@82 92 Tw = ii[WS(rs, 7)];
Chris@82 93 Ts = W[12];
Chris@82 94 Tu = Ts * Tt;
Chris@82 95 T1h = Ts * Tw;
Chris@82 96 Tv = W[13];
Chris@82 97 Tx = FMA(Tv, Tw, Tu);
Chris@82 98 T1i = FNMS(Tv, Tt, T1h);
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E Tn, Tq, To, T1j, Tm, Tp;
Chris@82 102 Tn = ri[WS(rs, 4)];
Chris@82 103 Tq = ii[WS(rs, 4)];
Chris@82 104 Tm = W[6];
Chris@82 105 To = Tm * Tn;
Chris@82 106 T1j = Tm * Tq;
Chris@82 107 Tp = W[7];
Chris@82 108 Tr = FMA(Tp, Tq, To);
Chris@82 109 T1k = FNMS(Tp, Tn, T1j);
Chris@82 110 }
Chris@82 111 T1l = T1i - T1k;
Chris@82 112 T1r = Tr - Tx;
Chris@82 113 Ty = Tr + Tx;
Chris@82 114 T1p = T1k + T1i;
Chris@82 115 Tj = W[1];
Chris@82 116 Tl = FMA(Tj, Tk, Ti);
Chris@82 117 T1o = FNMS(Tj, Th, T1n);
Chris@82 118 T1g = FNMS(KP500000000, Ty, Tl);
Chris@82 119 T1q = FNMS(KP500000000, T1p, T1o);
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E TB, TE, TC, T12, TR, T17, TL, T15, TA, TD;
Chris@82 123 TB = ri[WS(rs, 2)];
Chris@82 124 TE = ii[WS(rs, 2)];
Chris@82 125 TA = W[2];
Chris@82 126 TC = TA * TB;
Chris@82 127 T12 = TA * TE;
Chris@82 128 {
Chris@82 129 E TN, TQ, TO, T16, TM, TP;
Chris@82 130 TN = ri[WS(rs, 8)];
Chris@82 131 TQ = ii[WS(rs, 8)];
Chris@82 132 TM = W[14];
Chris@82 133 TO = TM * TN;
Chris@82 134 T16 = TM * TQ;
Chris@82 135 TP = W[15];
Chris@82 136 TR = FMA(TP, TQ, TO);
Chris@82 137 T17 = FNMS(TP, TN, T16);
Chris@82 138 }
Chris@82 139 {
Chris@82 140 E TH, TK, TI, T14, TG, TJ;
Chris@82 141 TH = ri[WS(rs, 5)];
Chris@82 142 TK = ii[WS(rs, 5)];
Chris@82 143 TG = W[8];
Chris@82 144 TI = TG * TH;
Chris@82 145 T14 = TG * TK;
Chris@82 146 TJ = W[9];
Chris@82 147 TL = FMA(TJ, TK, TI);
Chris@82 148 T15 = FNMS(TJ, TH, T14);
Chris@82 149 }
Chris@82 150 T1a = TR - TL;
Chris@82 151 T1d = T15 - T17;
Chris@82 152 TS = TL + TR;
Chris@82 153 T18 = T15 + T17;
Chris@82 154 TD = W[3];
Chris@82 155 TF = FMA(TD, TE, TC);
Chris@82 156 T13 = FNMS(TD, TB, T12);
Chris@82 157 T19 = FNMS(KP500000000, T18, T13);
Chris@82 158 T1c = FNMS(KP500000000, TS, TF);
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E Tf, T1S, TU, T1U, T1O, T1P, T1L, T1T;
Chris@82 162 Tf = T1 + Te;
Chris@82 163 T1S = T1Q + T1R;
Chris@82 164 {
Chris@82 165 E Tz, TT, T1M, T1N;
Chris@82 166 Tz = Tl + Ty;
Chris@82 167 TT = TF + TS;
Chris@82 168 TU = Tz + TT;
Chris@82 169 T1U = TT - Tz;
Chris@82 170 T1M = T1o + T1p;
Chris@82 171 T1N = T13 + T18;
Chris@82 172 T1O = T1M - T1N;
Chris@82 173 T1P = T1M + T1N;
Chris@82 174 }
Chris@82 175 ri[0] = Tf + TU;
Chris@82 176 ii[0] = T1P + T1S;
Chris@82 177 T1L = FNMS(KP500000000, TU, Tf);
Chris@82 178 ri[WS(rs, 6)] = FNMS(KP866025403, T1O, T1L);
Chris@82 179 ri[WS(rs, 3)] = FMA(KP866025403, T1O, T1L);
Chris@82 180 T1T = FNMS(KP500000000, T1P, T1S);
Chris@82 181 ii[WS(rs, 3)] = FMA(KP866025403, T1U, T1T);
Chris@82 182 ii[WS(rs, 6)] = FNMS(KP866025403, T1U, T1T);
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T11, T1z, T1X, T21, T1f, T1w, T1t, T1x, T1u, T1Y, T1C, T1I, T1F, T1J, T1G;
Chris@82 186 E T22, TV, T1V;
Chris@82 187 TV = FNMS(KP500000000, Te, T1);
Chris@82 188 T11 = FMA(KP866025403, T10, TV);
Chris@82 189 T1z = FNMS(KP866025403, T10, TV);
Chris@82 190 T1V = FNMS(KP500000000, T1Q, T1R);
Chris@82 191 T1X = FMA(KP866025403, T1W, T1V);
Chris@82 192 T21 = FNMS(KP866025403, T1W, T1V);
Chris@82 193 {
Chris@82 194 E T1b, T1e, T1m, T1s;
Chris@82 195 T1b = FMA(KP866025403, T1a, T19);
Chris@82 196 T1e = FMA(KP866025403, T1d, T1c);
Chris@82 197 T1f = FMA(KP176326980, T1e, T1b);
Chris@82 198 T1w = FNMS(KP176326980, T1b, T1e);
Chris@82 199 T1m = FNMS(KP866025403, T1l, T1g);
Chris@82 200 T1s = FNMS(KP866025403, T1r, T1q);
Chris@82 201 T1t = FMA(KP839099631, T1s, T1m);
Chris@82 202 T1x = FNMS(KP839099631, T1m, T1s);
Chris@82 203 }
Chris@82 204 T1u = FMA(KP777861913, T1t, T1f);
Chris@82 205 T1Y = FNMS(KP777861913, T1x, T1w);
Chris@82 206 {
Chris@82 207 E T1A, T1B, T1D, T1E;
Chris@82 208 T1A = FMA(KP866025403, T1r, T1q);
Chris@82 209 T1B = FMA(KP866025403, T1l, T1g);
Chris@82 210 T1C = FMA(KP176326980, T1B, T1A);
Chris@82 211 T1I = FNMS(KP176326980, T1A, T1B);
Chris@82 212 T1D = FNMS(KP866025403, T1d, T1c);
Chris@82 213 T1E = FNMS(KP866025403, T1a, T19);
Chris@82 214 T1F = FNMS(KP363970234, T1E, T1D);
Chris@82 215 T1J = FMA(KP363970234, T1D, T1E);
Chris@82 216 }
Chris@82 217 T1G = FNMS(KP954188894, T1F, T1C);
Chris@82 218 T22 = FMA(KP954188894, T1J, T1I);
Chris@82 219 ri[WS(rs, 1)] = FMA(KP984807753, T1u, T11);
Chris@82 220 ii[WS(rs, 1)] = FNMS(KP984807753, T1Y, T1X);
Chris@82 221 ri[WS(rs, 2)] = FMA(KP984807753, T1G, T1z);
Chris@82 222 ii[WS(rs, 2)] = FNMS(KP984807753, T22, T21);
Chris@82 223 {
Chris@82 224 E T1v, T1y, T1Z, T20;
Chris@82 225 T1v = FNMS(KP492403876, T1u, T11);
Chris@82 226 T1y = FMA(KP777861913, T1x, T1w);
Chris@82 227 ri[WS(rs, 4)] = FMA(KP852868531, T1y, T1v);
Chris@82 228 ri[WS(rs, 7)] = FNMS(KP852868531, T1y, T1v);
Chris@82 229 T1Z = FMA(KP492403876, T1Y, T1X);
Chris@82 230 T20 = FNMS(KP777861913, T1t, T1f);
Chris@82 231 ii[WS(rs, 4)] = FMA(KP852868531, T20, T1Z);
Chris@82 232 ii[WS(rs, 7)] = FNMS(KP852868531, T20, T1Z);
Chris@82 233 }
Chris@82 234 {
Chris@82 235 E T1H, T1K, T23, T24;
Chris@82 236 T1H = FNMS(KP492403876, T1G, T1z);
Chris@82 237 T1K = FNMS(KP954188894, T1J, T1I);
Chris@82 238 ri[WS(rs, 5)] = FNMS(KP852868531, T1K, T1H);
Chris@82 239 ri[WS(rs, 8)] = FMA(KP852868531, T1K, T1H);
Chris@82 240 T23 = FMA(KP492403876, T22, T21);
Chris@82 241 T24 = FMA(KP954188894, T1F, T1C);
Chris@82 242 ii[WS(rs, 5)] = FNMS(KP852868531, T24, T23);
Chris@82 243 ii[WS(rs, 8)] = FMA(KP852868531, T24, T23);
Chris@82 244 }
Chris@82 245 }
Chris@82 246 }
Chris@82 247 }
Chris@82 248 }
Chris@82 249
Chris@82 250 static const tw_instr twinstr[] = {
Chris@82 251 {TW_FULL, 0, 9},
Chris@82 252 {TW_NEXT, 1, 0}
Chris@82 253 };
Chris@82 254
Chris@82 255 static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {24, 16, 72, 0}, 0, 0, 0 };
Chris@82 256
Chris@82 257 void X(codelet_t1_9) (planner *p) {
Chris@82 258 X(kdft_dit_register) (p, t1_9, &desc);
Chris@82 259 }
Chris@82 260 #else
Chris@82 261
Chris@82 262 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include dft/scalar/t.h */
Chris@82 263
Chris@82 264 /*
Chris@82 265 * This function contains 96 FP additions, 72 FP multiplications,
Chris@82 266 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@82 267 * 41 stack variables, 8 constants, and 36 memory accesses
Chris@82 268 */
Chris@82 269 #include "dft/scalar/t.h"
Chris@82 270
Chris@82 271 static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 272 {
Chris@82 273 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@82 274 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@82 275 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 276 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@82 277 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@82 278 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 279 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 280 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 281 {
Chris@82 282 INT m;
Chris@82 283 for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@82 284 E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu;
Chris@82 285 E T1w, TW, T1k, T11, T1l;
Chris@82 286 {
Chris@82 287 E T6, TO, Tb, TP;
Chris@82 288 T1 = ri[0];
Chris@82 289 T1B = ii[0];
Chris@82 290 {
Chris@82 291 E T3, T5, T2, T4;
Chris@82 292 T3 = ri[WS(rs, 3)];
Chris@82 293 T5 = ii[WS(rs, 3)];
Chris@82 294 T2 = W[4];
Chris@82 295 T4 = W[5];
Chris@82 296 T6 = FMA(T2, T3, T4 * T5);
Chris@82 297 TO = FNMS(T4, T3, T2 * T5);
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T8, Ta, T7, T9;
Chris@82 301 T8 = ri[WS(rs, 6)];
Chris@82 302 Ta = ii[WS(rs, 6)];
Chris@82 303 T7 = W[10];
Chris@82 304 T9 = W[11];
Chris@82 305 Tb = FMA(T7, T8, T9 * Ta);
Chris@82 306 TP = FNMS(T9, T8, T7 * Ta);
Chris@82 307 }
Chris@82 308 TQ = KP866025403 * (TO - TP);
Chris@82 309 T1G = KP866025403 * (Tb - T6);
Chris@82 310 Tc = T6 + Tb;
Chris@82 311 TN = FNMS(KP500000000, Tc, T1);
Chris@82 312 T1A = TO + TP;
Chris@82 313 T1H = FNMS(KP500000000, T1A, T1B);
Chris@82 314 }
Chris@82 315 {
Chris@82 316 E Tz, T19, TE, T14, TJ, T15, TK, T1a;
Chris@82 317 {
Chris@82 318 E Tw, Ty, Tv, Tx;
Chris@82 319 Tw = ri[WS(rs, 2)];
Chris@82 320 Ty = ii[WS(rs, 2)];
Chris@82 321 Tv = W[2];
Chris@82 322 Tx = W[3];
Chris@82 323 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 324 T19 = FNMS(Tx, Tw, Tv * Ty);
Chris@82 325 }
Chris@82 326 {
Chris@82 327 E TB, TD, TA, TC;
Chris@82 328 TB = ri[WS(rs, 5)];
Chris@82 329 TD = ii[WS(rs, 5)];
Chris@82 330 TA = W[8];
Chris@82 331 TC = W[9];
Chris@82 332 TE = FMA(TA, TB, TC * TD);
Chris@82 333 T14 = FNMS(TC, TB, TA * TD);
Chris@82 334 }
Chris@82 335 {
Chris@82 336 E TG, TI, TF, TH;
Chris@82 337 TG = ri[WS(rs, 8)];
Chris@82 338 TI = ii[WS(rs, 8)];
Chris@82 339 TF = W[14];
Chris@82 340 TH = W[15];
Chris@82 341 TJ = FMA(TF, TG, TH * TI);
Chris@82 342 T15 = FNMS(TH, TG, TF * TI);
Chris@82 343 }
Chris@82 344 TK = TE + TJ;
Chris@82 345 T1a = T14 + T15;
Chris@82 346 TL = Tz + TK;
Chris@82 347 T1x = T19 + T1a;
Chris@82 348 {
Chris@82 349 E T13, T16, T18, T1b;
Chris@82 350 T13 = FNMS(KP500000000, TK, Tz);
Chris@82 351 T16 = KP866025403 * (T14 - T15);
Chris@82 352 T17 = T13 + T16;
Chris@82 353 T1o = T13 - T16;
Chris@82 354 T18 = KP866025403 * (TJ - TE);
Chris@82 355 T1b = FNMS(KP500000000, T1a, T19);
Chris@82 356 T1c = T18 + T1b;
Chris@82 357 T1n = T1b - T18;
Chris@82 358 }
Chris@82 359 }
Chris@82 360 {
Chris@82 361 E Ti, TY, Tn, TT, Ts, TU, Tt, TZ;
Chris@82 362 {
Chris@82 363 E Tf, Th, Te, Tg;
Chris@82 364 Tf = ri[WS(rs, 1)];
Chris@82 365 Th = ii[WS(rs, 1)];
Chris@82 366 Te = W[0];
Chris@82 367 Tg = W[1];
Chris@82 368 Ti = FMA(Te, Tf, Tg * Th);
Chris@82 369 TY = FNMS(Tg, Tf, Te * Th);
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E Tk, Tm, Tj, Tl;
Chris@82 373 Tk = ri[WS(rs, 4)];
Chris@82 374 Tm = ii[WS(rs, 4)];
Chris@82 375 Tj = W[6];
Chris@82 376 Tl = W[7];
Chris@82 377 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 378 TT = FNMS(Tl, Tk, Tj * Tm);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E Tp, Tr, To, Tq;
Chris@82 382 Tp = ri[WS(rs, 7)];
Chris@82 383 Tr = ii[WS(rs, 7)];
Chris@82 384 To = W[12];
Chris@82 385 Tq = W[13];
Chris@82 386 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 387 TU = FNMS(Tq, Tp, To * Tr);
Chris@82 388 }
Chris@82 389 Tt = Tn + Ts;
Chris@82 390 TZ = TT + TU;
Chris@82 391 Tu = Ti + Tt;
Chris@82 392 T1w = TY + TZ;
Chris@82 393 {
Chris@82 394 E TS, TV, TX, T10;
Chris@82 395 TS = FNMS(KP500000000, Tt, Ti);
Chris@82 396 TV = KP866025403 * (TT - TU);
Chris@82 397 TW = TS + TV;
Chris@82 398 T1k = TS - TV;
Chris@82 399 TX = KP866025403 * (Ts - Tn);
Chris@82 400 T10 = FNMS(KP500000000, TZ, TY);
Chris@82 401 T11 = TX + T10;
Chris@82 402 T1l = T10 - TX;
Chris@82 403 }
Chris@82 404 }
Chris@82 405 {
Chris@82 406 E T1y, Td, TM, T1v;
Chris@82 407 T1y = KP866025403 * (T1w - T1x);
Chris@82 408 Td = T1 + Tc;
Chris@82 409 TM = Tu + TL;
Chris@82 410 T1v = FNMS(KP500000000, TM, Td);
Chris@82 411 ri[0] = Td + TM;
Chris@82 412 ri[WS(rs, 3)] = T1v + T1y;
Chris@82 413 ri[WS(rs, 6)] = T1v - T1y;
Chris@82 414 }
Chris@82 415 {
Chris@82 416 E T1D, T1z, T1C, T1E;
Chris@82 417 T1D = KP866025403 * (TL - Tu);
Chris@82 418 T1z = T1w + T1x;
Chris@82 419 T1C = T1A + T1B;
Chris@82 420 T1E = FNMS(KP500000000, T1z, T1C);
Chris@82 421 ii[0] = T1z + T1C;
Chris@82 422 ii[WS(rs, 6)] = T1E - T1D;
Chris@82 423 ii[WS(rs, 3)] = T1D + T1E;
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K;
Chris@82 427 TR = TN + TQ;
Chris@82 428 T1I = T1G + T1H;
Chris@82 429 {
Chris@82 430 E T12, T1d, T1g, T1h;
Chris@82 431 T12 = FMA(KP766044443, TW, KP642787609 * T11);
Chris@82 432 T1d = FMA(KP173648177, T17, KP984807753 * T1c);
Chris@82 433 T1e = T12 + T1d;
Chris@82 434 T1J = KP866025403 * (T1d - T12);
Chris@82 435 T1g = FNMS(KP642787609, TW, KP766044443 * T11);
Chris@82 436 T1h = FNMS(KP984807753, T17, KP173648177 * T1c);
Chris@82 437 T1i = KP866025403 * (T1g - T1h);
Chris@82 438 T1F = T1g + T1h;
Chris@82 439 }
Chris@82 440 ri[WS(rs, 1)] = TR + T1e;
Chris@82 441 ii[WS(rs, 1)] = T1F + T1I;
Chris@82 442 T1f = FNMS(KP500000000, T1e, TR);
Chris@82 443 ri[WS(rs, 7)] = T1f - T1i;
Chris@82 444 ri[WS(rs, 4)] = T1f + T1i;
Chris@82 445 T1K = FNMS(KP500000000, T1F, T1I);
Chris@82 446 ii[WS(rs, 4)] = T1J + T1K;
Chris@82 447 ii[WS(rs, 7)] = T1K - T1J;
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T1j, T1M, T1q, T1N, T1u, T1L, T1r, T1O;
Chris@82 451 T1j = TN - TQ;
Chris@82 452 T1M = T1H - T1G;
Chris@82 453 {
Chris@82 454 E T1m, T1p, T1s, T1t;
Chris@82 455 T1m = FMA(KP173648177, T1k, KP984807753 * T1l);
Chris@82 456 T1p = FNMS(KP939692620, T1o, KP342020143 * T1n);
Chris@82 457 T1q = T1m + T1p;
Chris@82 458 T1N = KP866025403 * (T1p - T1m);
Chris@82 459 T1s = FNMS(KP984807753, T1k, KP173648177 * T1l);
Chris@82 460 T1t = FMA(KP342020143, T1o, KP939692620 * T1n);
Chris@82 461 T1u = KP866025403 * (T1s + T1t);
Chris@82 462 T1L = T1s - T1t;
Chris@82 463 }
Chris@82 464 ri[WS(rs, 2)] = T1j + T1q;
Chris@82 465 ii[WS(rs, 2)] = T1L + T1M;
Chris@82 466 T1r = FNMS(KP500000000, T1q, T1j);
Chris@82 467 ri[WS(rs, 8)] = T1r - T1u;
Chris@82 468 ri[WS(rs, 5)] = T1r + T1u;
Chris@82 469 T1O = FNMS(KP500000000, T1L, T1M);
Chris@82 470 ii[WS(rs, 5)] = T1N + T1O;
Chris@82 471 ii[WS(rs, 8)] = T1O - T1N;
Chris@82 472 }
Chris@82 473 }
Chris@82 474 }
Chris@82 475 }
Chris@82 476
Chris@82 477 static const tw_instr twinstr[] = {
Chris@82 478 {TW_FULL, 0, 9},
Chris@82 479 {TW_NEXT, 1, 0}
Chris@82 480 };
Chris@82 481
Chris@82 482 static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {60, 36, 36, 0}, 0, 0, 0 };
Chris@82 483
Chris@82 484 void X(codelet_t1_9) (planner *p) {
Chris@82 485 X(kdft_dit_register) (p, t1_9, &desc);
Chris@82 486 }
Chris@82 487 #endif