annotate src/fftw-3.3.5/dft/scalar/codelets/t1_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:36:11 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include t.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 96 FP additions, 88 FP multiplications,
Chris@42 32 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
Chris@42 33 * 72 stack variables, 10 constants, and 36 memory accesses
Chris@42 34 */
Chris@42 35 #include "t.h"
Chris@42 36
Chris@42 37 static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP954188894, +0.954188894138671133499268364187245676532219158);
Chris@42 40 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@42 41 DK(KP363970234, +0.363970234266202361351047882776834043890471784);
Chris@42 42 DK(KP492403876, +0.492403876506104029683371512294761506835321626);
Chris@42 43 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 44 DK(KP777861913, +0.777861913430206160028177977318626690410586096);
Chris@42 45 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@42 46 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@42 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 49 {
Chris@42 50 INT m;
Chris@42 51 for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 52 E T1K, T24, T1H, T23;
Chris@42 53 {
Chris@42 54 E T1, T1R, T1Q, T10, T1W, Te, TB, T1l, T1r, T1q, T1M, TE, T1g, Tz, T12;
Chris@42 55 E TC, TH, TK, T17, TR, TG, TJ, TD;
Chris@42 56 T1 = ri[0];
Chris@42 57 T1R = ii[0];
Chris@42 58 {
Chris@42 59 E T9, Tc, TY, Ta, Tb, TX, T7;
Chris@42 60 {
Chris@42 61 E T3, T6, T8, TW, T4, T2, T5;
Chris@42 62 T3 = ri[WS(rs, 3)];
Chris@42 63 T6 = ii[WS(rs, 3)];
Chris@42 64 T2 = W[4];
Chris@42 65 T9 = ri[WS(rs, 6)];
Chris@42 66 Tc = ii[WS(rs, 6)];
Chris@42 67 T8 = W[10];
Chris@42 68 TW = T2 * T6;
Chris@42 69 T4 = T2 * T3;
Chris@42 70 T5 = W[5];
Chris@42 71 TY = T8 * Tc;
Chris@42 72 Ta = T8 * T9;
Chris@42 73 Tb = W[11];
Chris@42 74 TX = FNMS(T5, T3, TW);
Chris@42 75 T7 = FMA(T5, T6, T4);
Chris@42 76 }
Chris@42 77 {
Chris@42 78 E Th, Tk, Ti, T1n, Tn, Tq, Tp, T1i, Tx, T1j, To, Tj, TZ, Td, Tg;
Chris@42 79 E TA, Tl, Ty;
Chris@42 80 Th = ri[WS(rs, 1)];
Chris@42 81 TZ = FNMS(Tb, T9, TY);
Chris@42 82 Td = FMA(Tb, Tc, Ta);
Chris@42 83 Tk = ii[WS(rs, 1)];
Chris@42 84 Tg = W[0];
Chris@42 85 T1Q = TX + TZ;
Chris@42 86 T10 = TX - TZ;
Chris@42 87 T1W = Td - T7;
Chris@42 88 Te = T7 + Td;
Chris@42 89 Ti = Tg * Th;
Chris@42 90 T1n = Tg * Tk;
Chris@42 91 {
Chris@42 92 E Tt, Tw, Ts, Tv, T1h, Tu, Tm;
Chris@42 93 Tt = ri[WS(rs, 7)];
Chris@42 94 Tw = ii[WS(rs, 7)];
Chris@42 95 Ts = W[12];
Chris@42 96 Tv = W[13];
Chris@42 97 Tn = ri[WS(rs, 4)];
Chris@42 98 Tq = ii[WS(rs, 4)];
Chris@42 99 T1h = Ts * Tw;
Chris@42 100 Tu = Ts * Tt;
Chris@42 101 Tm = W[6];
Chris@42 102 Tp = W[7];
Chris@42 103 T1i = FNMS(Tv, Tt, T1h);
Chris@42 104 Tx = FMA(Tv, Tw, Tu);
Chris@42 105 T1j = Tm * Tq;
Chris@42 106 To = Tm * Tn;
Chris@42 107 }
Chris@42 108 Tj = W[1];
Chris@42 109 TB = ri[WS(rs, 2)];
Chris@42 110 {
Chris@42 111 E T1k, Tr, T1o, T1p;
Chris@42 112 T1k = FNMS(Tp, Tn, T1j);
Chris@42 113 Tr = FMA(Tp, Tq, To);
Chris@42 114 T1o = FNMS(Tj, Th, T1n);
Chris@42 115 Tl = FMA(Tj, Tk, Ti);
Chris@42 116 T1p = T1k + T1i;
Chris@42 117 T1l = T1i - T1k;
Chris@42 118 Ty = Tr + Tx;
Chris@42 119 T1r = Tr - Tx;
Chris@42 120 T1q = FNMS(KP500000000, T1p, T1o);
Chris@42 121 T1M = T1o + T1p;
Chris@42 122 TE = ii[WS(rs, 2)];
Chris@42 123 }
Chris@42 124 T1g = FNMS(KP500000000, Ty, Tl);
Chris@42 125 Tz = Tl + Ty;
Chris@42 126 TA = W[2];
Chris@42 127 {
Chris@42 128 E TN, TQ, TP, T16, TO, TM;
Chris@42 129 TN = ri[WS(rs, 8)];
Chris@42 130 TQ = ii[WS(rs, 8)];
Chris@42 131 TM = W[14];
Chris@42 132 T12 = TA * TE;
Chris@42 133 TC = TA * TB;
Chris@42 134 TP = W[15];
Chris@42 135 T16 = TM * TQ;
Chris@42 136 TO = TM * TN;
Chris@42 137 TH = ri[WS(rs, 5)];
Chris@42 138 TK = ii[WS(rs, 5)];
Chris@42 139 T17 = FNMS(TP, TN, T16);
Chris@42 140 TR = FMA(TP, TQ, TO);
Chris@42 141 TG = W[8];
Chris@42 142 TJ = W[9];
Chris@42 143 }
Chris@42 144 TD = W[3];
Chris@42 145 }
Chris@42 146 }
Chris@42 147 {
Chris@42 148 E TV, Tf, T1S, T1V, T1d, T1a, T19, T1N, TT, T1c;
Chris@42 149 {
Chris@42 150 E T13, TF, T15, TL, T14, TI, TS, T18;
Chris@42 151 TV = FNMS(KP500000000, Te, T1);
Chris@42 152 Tf = T1 + Te;
Chris@42 153 T14 = TG * TK;
Chris@42 154 TI = TG * TH;
Chris@42 155 T13 = FNMS(TD, TB, T12);
Chris@42 156 TF = FMA(TD, TE, TC);
Chris@42 157 T15 = FNMS(TJ, TH, T14);
Chris@42 158 TL = FMA(TJ, TK, TI);
Chris@42 159 T1S = T1Q + T1R;
Chris@42 160 T1V = FNMS(KP500000000, T1Q, T1R);
Chris@42 161 T18 = T15 + T17;
Chris@42 162 T1d = T15 - T17;
Chris@42 163 TS = TL + TR;
Chris@42 164 T1a = TR - TL;
Chris@42 165 T19 = FNMS(KP500000000, T18, T13);
Chris@42 166 T1N = T13 + T18;
Chris@42 167 TT = TF + TS;
Chris@42 168 T1c = FNMS(KP500000000, TS, TF);
Chris@42 169 }
Chris@42 170 {
Chris@42 171 E T11, T1z, T1E, T1D, T21, T1X, T1I, T1C, T1Y, T1y, T20, T1u, T1U, TU;
Chris@42 172 T1U = TT - Tz;
Chris@42 173 TU = Tz + TT;
Chris@42 174 {
Chris@42 175 E T1P, T1O, T1L, T1T;
Chris@42 176 T1P = T1M + T1N;
Chris@42 177 T1O = T1M - T1N;
Chris@42 178 T11 = FMA(KP866025403, T10, TV);
Chris@42 179 T1z = FNMS(KP866025403, T10, TV);
Chris@42 180 T1L = FNMS(KP500000000, TU, Tf);
Chris@42 181 ri[0] = Tf + TU;
Chris@42 182 T1T = FNMS(KP500000000, T1P, T1S);
Chris@42 183 ii[0] = T1P + T1S;
Chris@42 184 ri[WS(rs, 3)] = FMA(KP866025403, T1O, T1L);
Chris@42 185 ri[WS(rs, 6)] = FNMS(KP866025403, T1O, T1L);
Chris@42 186 ii[WS(rs, 6)] = FNMS(KP866025403, T1U, T1T);
Chris@42 187 ii[WS(rs, 3)] = FMA(KP866025403, T1U, T1T);
Chris@42 188 }
Chris@42 189 {
Chris@42 190 E T1B, T1m, T1w, T1f, T1s, T1A, T1b, T1e, T1x, T1t;
Chris@42 191 T1E = FNMS(KP866025403, T1a, T19);
Chris@42 192 T1b = FMA(KP866025403, T1a, T19);
Chris@42 193 T1e = FMA(KP866025403, T1d, T1c);
Chris@42 194 T1D = FNMS(KP866025403, T1d, T1c);
Chris@42 195 T1B = FMA(KP866025403, T1l, T1g);
Chris@42 196 T1m = FNMS(KP866025403, T1l, T1g);
Chris@42 197 T21 = FNMS(KP866025403, T1W, T1V);
Chris@42 198 T1X = FMA(KP866025403, T1W, T1V);
Chris@42 199 T1w = FNMS(KP176326980, T1b, T1e);
Chris@42 200 T1f = FMA(KP176326980, T1e, T1b);
Chris@42 201 T1s = FNMS(KP866025403, T1r, T1q);
Chris@42 202 T1A = FMA(KP866025403, T1r, T1q);
Chris@42 203 T1x = FNMS(KP839099631, T1m, T1s);
Chris@42 204 T1t = FMA(KP839099631, T1s, T1m);
Chris@42 205 T1I = FNMS(KP176326980, T1A, T1B);
Chris@42 206 T1C = FMA(KP176326980, T1B, T1A);
Chris@42 207 T1Y = FNMS(KP777861913, T1x, T1w);
Chris@42 208 T1y = FMA(KP777861913, T1x, T1w);
Chris@42 209 T20 = FNMS(KP777861913, T1t, T1f);
Chris@42 210 T1u = FMA(KP777861913, T1t, T1f);
Chris@42 211 }
Chris@42 212 {
Chris@42 213 E T22, T1G, T1Z, T1F, T1J, T1v;
Chris@42 214 ii[WS(rs, 1)] = FNMS(KP984807753, T1Y, T1X);
Chris@42 215 T1v = FNMS(KP492403876, T1u, T11);
Chris@42 216 ri[WS(rs, 1)] = FMA(KP984807753, T1u, T11);
Chris@42 217 T1F = FNMS(KP363970234, T1E, T1D);
Chris@42 218 T1J = FMA(KP363970234, T1D, T1E);
Chris@42 219 ri[WS(rs, 4)] = FMA(KP852868531, T1y, T1v);
Chris@42 220 ri[WS(rs, 7)] = FNMS(KP852868531, T1y, T1v);
Chris@42 221 T1K = FNMS(KP954188894, T1J, T1I);
Chris@42 222 T22 = FMA(KP954188894, T1J, T1I);
Chris@42 223 T1G = FNMS(KP954188894, T1F, T1C);
Chris@42 224 T24 = FMA(KP954188894, T1F, T1C);
Chris@42 225 T1Z = FMA(KP492403876, T1Y, T1X);
Chris@42 226 ii[WS(rs, 2)] = FNMS(KP984807753, T22, T21);
Chris@42 227 ri[WS(rs, 2)] = FMA(KP984807753, T1G, T1z);
Chris@42 228 T1H = FNMS(KP492403876, T1G, T1z);
Chris@42 229 ii[WS(rs, 7)] = FNMS(KP852868531, T20, T1Z);
Chris@42 230 ii[WS(rs, 4)] = FMA(KP852868531, T20, T1Z);
Chris@42 231 T23 = FMA(KP492403876, T22, T21);
Chris@42 232 }
Chris@42 233 }
Chris@42 234 }
Chris@42 235 }
Chris@42 236 ri[WS(rs, 8)] = FMA(KP852868531, T1K, T1H);
Chris@42 237 ri[WS(rs, 5)] = FNMS(KP852868531, T1K, T1H);
Chris@42 238 ii[WS(rs, 8)] = FMA(KP852868531, T24, T23);
Chris@42 239 ii[WS(rs, 5)] = FNMS(KP852868531, T24, T23);
Chris@42 240 }
Chris@42 241 }
Chris@42 242 }
Chris@42 243
Chris@42 244 static const tw_instr twinstr[] = {
Chris@42 245 {TW_FULL, 0, 9},
Chris@42 246 {TW_NEXT, 1, 0}
Chris@42 247 };
Chris@42 248
Chris@42 249 static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {24, 16, 72, 0}, 0, 0, 0 };
Chris@42 250
Chris@42 251 void X(codelet_t1_9) (planner *p) {
Chris@42 252 X(kdft_dit_register) (p, t1_9, &desc);
Chris@42 253 }
Chris@42 254 #else /* HAVE_FMA */
Chris@42 255
Chris@42 256 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include t.h */
Chris@42 257
Chris@42 258 /*
Chris@42 259 * This function contains 96 FP additions, 72 FP multiplications,
Chris@42 260 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
Chris@42 261 * 41 stack variables, 8 constants, and 36 memory accesses
Chris@42 262 */
Chris@42 263 #include "t.h"
Chris@42 264
Chris@42 265 static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 266 {
Chris@42 267 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@42 268 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@42 269 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 270 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@42 271 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@42 272 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@42 273 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 274 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 275 {
Chris@42 276 INT m;
Chris@42 277 for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
Chris@42 278 E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu;
Chris@42 279 E T1w, TW, T1k, T11, T1l;
Chris@42 280 {
Chris@42 281 E T6, TO, Tb, TP;
Chris@42 282 T1 = ri[0];
Chris@42 283 T1B = ii[0];
Chris@42 284 {
Chris@42 285 E T3, T5, T2, T4;
Chris@42 286 T3 = ri[WS(rs, 3)];
Chris@42 287 T5 = ii[WS(rs, 3)];
Chris@42 288 T2 = W[4];
Chris@42 289 T4 = W[5];
Chris@42 290 T6 = FMA(T2, T3, T4 * T5);
Chris@42 291 TO = FNMS(T4, T3, T2 * T5);
Chris@42 292 }
Chris@42 293 {
Chris@42 294 E T8, Ta, T7, T9;
Chris@42 295 T8 = ri[WS(rs, 6)];
Chris@42 296 Ta = ii[WS(rs, 6)];
Chris@42 297 T7 = W[10];
Chris@42 298 T9 = W[11];
Chris@42 299 Tb = FMA(T7, T8, T9 * Ta);
Chris@42 300 TP = FNMS(T9, T8, T7 * Ta);
Chris@42 301 }
Chris@42 302 TQ = KP866025403 * (TO - TP);
Chris@42 303 T1G = KP866025403 * (Tb - T6);
Chris@42 304 Tc = T6 + Tb;
Chris@42 305 TN = FNMS(KP500000000, Tc, T1);
Chris@42 306 T1A = TO + TP;
Chris@42 307 T1H = FNMS(KP500000000, T1A, T1B);
Chris@42 308 }
Chris@42 309 {
Chris@42 310 E Tz, T19, TE, T14, TJ, T15, TK, T1a;
Chris@42 311 {
Chris@42 312 E Tw, Ty, Tv, Tx;
Chris@42 313 Tw = ri[WS(rs, 2)];
Chris@42 314 Ty = ii[WS(rs, 2)];
Chris@42 315 Tv = W[2];
Chris@42 316 Tx = W[3];
Chris@42 317 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@42 318 T19 = FNMS(Tx, Tw, Tv * Ty);
Chris@42 319 }
Chris@42 320 {
Chris@42 321 E TB, TD, TA, TC;
Chris@42 322 TB = ri[WS(rs, 5)];
Chris@42 323 TD = ii[WS(rs, 5)];
Chris@42 324 TA = W[8];
Chris@42 325 TC = W[9];
Chris@42 326 TE = FMA(TA, TB, TC * TD);
Chris@42 327 T14 = FNMS(TC, TB, TA * TD);
Chris@42 328 }
Chris@42 329 {
Chris@42 330 E TG, TI, TF, TH;
Chris@42 331 TG = ri[WS(rs, 8)];
Chris@42 332 TI = ii[WS(rs, 8)];
Chris@42 333 TF = W[14];
Chris@42 334 TH = W[15];
Chris@42 335 TJ = FMA(TF, TG, TH * TI);
Chris@42 336 T15 = FNMS(TH, TG, TF * TI);
Chris@42 337 }
Chris@42 338 TK = TE + TJ;
Chris@42 339 T1a = T14 + T15;
Chris@42 340 TL = Tz + TK;
Chris@42 341 T1x = T19 + T1a;
Chris@42 342 {
Chris@42 343 E T13, T16, T18, T1b;
Chris@42 344 T13 = FNMS(KP500000000, TK, Tz);
Chris@42 345 T16 = KP866025403 * (T14 - T15);
Chris@42 346 T17 = T13 + T16;
Chris@42 347 T1o = T13 - T16;
Chris@42 348 T18 = KP866025403 * (TJ - TE);
Chris@42 349 T1b = FNMS(KP500000000, T1a, T19);
Chris@42 350 T1c = T18 + T1b;
Chris@42 351 T1n = T1b - T18;
Chris@42 352 }
Chris@42 353 }
Chris@42 354 {
Chris@42 355 E Ti, TY, Tn, TT, Ts, TU, Tt, TZ;
Chris@42 356 {
Chris@42 357 E Tf, Th, Te, Tg;
Chris@42 358 Tf = ri[WS(rs, 1)];
Chris@42 359 Th = ii[WS(rs, 1)];
Chris@42 360 Te = W[0];
Chris@42 361 Tg = W[1];
Chris@42 362 Ti = FMA(Te, Tf, Tg * Th);
Chris@42 363 TY = FNMS(Tg, Tf, Te * Th);
Chris@42 364 }
Chris@42 365 {
Chris@42 366 E Tk, Tm, Tj, Tl;
Chris@42 367 Tk = ri[WS(rs, 4)];
Chris@42 368 Tm = ii[WS(rs, 4)];
Chris@42 369 Tj = W[6];
Chris@42 370 Tl = W[7];
Chris@42 371 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 372 TT = FNMS(Tl, Tk, Tj * Tm);
Chris@42 373 }
Chris@42 374 {
Chris@42 375 E Tp, Tr, To, Tq;
Chris@42 376 Tp = ri[WS(rs, 7)];
Chris@42 377 Tr = ii[WS(rs, 7)];
Chris@42 378 To = W[12];
Chris@42 379 Tq = W[13];
Chris@42 380 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 381 TU = FNMS(Tq, Tp, To * Tr);
Chris@42 382 }
Chris@42 383 Tt = Tn + Ts;
Chris@42 384 TZ = TT + TU;
Chris@42 385 Tu = Ti + Tt;
Chris@42 386 T1w = TY + TZ;
Chris@42 387 {
Chris@42 388 E TS, TV, TX, T10;
Chris@42 389 TS = FNMS(KP500000000, Tt, Ti);
Chris@42 390 TV = KP866025403 * (TT - TU);
Chris@42 391 TW = TS + TV;
Chris@42 392 T1k = TS - TV;
Chris@42 393 TX = KP866025403 * (Ts - Tn);
Chris@42 394 T10 = FNMS(KP500000000, TZ, TY);
Chris@42 395 T11 = TX + T10;
Chris@42 396 T1l = T10 - TX;
Chris@42 397 }
Chris@42 398 }
Chris@42 399 {
Chris@42 400 E T1y, Td, TM, T1v;
Chris@42 401 T1y = KP866025403 * (T1w - T1x);
Chris@42 402 Td = T1 + Tc;
Chris@42 403 TM = Tu + TL;
Chris@42 404 T1v = FNMS(KP500000000, TM, Td);
Chris@42 405 ri[0] = Td + TM;
Chris@42 406 ri[WS(rs, 3)] = T1v + T1y;
Chris@42 407 ri[WS(rs, 6)] = T1v - T1y;
Chris@42 408 }
Chris@42 409 {
Chris@42 410 E T1D, T1z, T1C, T1E;
Chris@42 411 T1D = KP866025403 * (TL - Tu);
Chris@42 412 T1z = T1w + T1x;
Chris@42 413 T1C = T1A + T1B;
Chris@42 414 T1E = FNMS(KP500000000, T1z, T1C);
Chris@42 415 ii[0] = T1z + T1C;
Chris@42 416 ii[WS(rs, 6)] = T1E - T1D;
Chris@42 417 ii[WS(rs, 3)] = T1D + T1E;
Chris@42 418 }
Chris@42 419 {
Chris@42 420 E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K;
Chris@42 421 TR = TN + TQ;
Chris@42 422 T1I = T1G + T1H;
Chris@42 423 {
Chris@42 424 E T12, T1d, T1g, T1h;
Chris@42 425 T12 = FMA(KP766044443, TW, KP642787609 * T11);
Chris@42 426 T1d = FMA(KP173648177, T17, KP984807753 * T1c);
Chris@42 427 T1e = T12 + T1d;
Chris@42 428 T1J = KP866025403 * (T1d - T12);
Chris@42 429 T1g = FNMS(KP642787609, TW, KP766044443 * T11);
Chris@42 430 T1h = FNMS(KP984807753, T17, KP173648177 * T1c);
Chris@42 431 T1i = KP866025403 * (T1g - T1h);
Chris@42 432 T1F = T1g + T1h;
Chris@42 433 }
Chris@42 434 ri[WS(rs, 1)] = TR + T1e;
Chris@42 435 ii[WS(rs, 1)] = T1F + T1I;
Chris@42 436 T1f = FNMS(KP500000000, T1e, TR);
Chris@42 437 ri[WS(rs, 7)] = T1f - T1i;
Chris@42 438 ri[WS(rs, 4)] = T1f + T1i;
Chris@42 439 T1K = FNMS(KP500000000, T1F, T1I);
Chris@42 440 ii[WS(rs, 4)] = T1J + T1K;
Chris@42 441 ii[WS(rs, 7)] = T1K - T1J;
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E T1j, T1M, T1q, T1N, T1u, T1L, T1r, T1O;
Chris@42 445 T1j = TN - TQ;
Chris@42 446 T1M = T1H - T1G;
Chris@42 447 {
Chris@42 448 E T1m, T1p, T1s, T1t;
Chris@42 449 T1m = FMA(KP173648177, T1k, KP984807753 * T1l);
Chris@42 450 T1p = FNMS(KP939692620, T1o, KP342020143 * T1n);
Chris@42 451 T1q = T1m + T1p;
Chris@42 452 T1N = KP866025403 * (T1p - T1m);
Chris@42 453 T1s = FNMS(KP984807753, T1k, KP173648177 * T1l);
Chris@42 454 T1t = FMA(KP342020143, T1o, KP939692620 * T1n);
Chris@42 455 T1u = KP866025403 * (T1s + T1t);
Chris@42 456 T1L = T1s - T1t;
Chris@42 457 }
Chris@42 458 ri[WS(rs, 2)] = T1j + T1q;
Chris@42 459 ii[WS(rs, 2)] = T1L + T1M;
Chris@42 460 T1r = FNMS(KP500000000, T1q, T1j);
Chris@42 461 ri[WS(rs, 8)] = T1r - T1u;
Chris@42 462 ri[WS(rs, 5)] = T1r + T1u;
Chris@42 463 T1O = FNMS(KP500000000, T1L, T1M);
Chris@42 464 ii[WS(rs, 5)] = T1N + T1O;
Chris@42 465 ii[WS(rs, 8)] = T1O - T1N;
Chris@42 466 }
Chris@42 467 }
Chris@42 468 }
Chris@42 469 }
Chris@42 470
Chris@42 471 static const tw_instr twinstr[] = {
Chris@42 472 {TW_FULL, 0, 9},
Chris@42 473 {TW_NEXT, 1, 0}
Chris@42 474 };
Chris@42 475
Chris@42 476 static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {60, 36, 36, 0}, 0, 0, 0 };
Chris@42 477
Chris@42 478 void X(codelet_t1_9) (planner *p) {
Chris@42 479 X(kdft_dit_register) (p, t1_9, &desc);
Chris@42 480 }
Chris@42 481 #endif /* HAVE_FMA */