annotate src/fftw-3.3.8/dft/scalar/codelets/t1_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:13 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include dft/scalar/t.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@82 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@82 33 * 31 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/t.h"
Chris@82 36
Chris@82 37 static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@82 44 E T1, TX, T7, TW, Tl, TR, TB, TJ, Ty, TS, TC, TO;
Chris@82 45 T1 = ri[0];
Chris@82 46 TX = ii[0];
Chris@82 47 {
Chris@82 48 E T3, T6, T4, TV, T2, T5;
Chris@82 49 T3 = ri[WS(rs, 3)];
Chris@82 50 T6 = ii[WS(rs, 3)];
Chris@82 51 T2 = W[4];
Chris@82 52 T4 = T2 * T3;
Chris@82 53 TV = T2 * T6;
Chris@82 54 T5 = W[5];
Chris@82 55 T7 = FMA(T5, T6, T4);
Chris@82 56 TW = FNMS(T5, T3, TV);
Chris@82 57 }
Chris@82 58 {
Chris@82 59 E Ta, Td, Tb, TF, Tg, Tj, Th, TH, T9, Tf;
Chris@82 60 Ta = ri[WS(rs, 2)];
Chris@82 61 Td = ii[WS(rs, 2)];
Chris@82 62 T9 = W[2];
Chris@82 63 Tb = T9 * Ta;
Chris@82 64 TF = T9 * Td;
Chris@82 65 Tg = ri[WS(rs, 5)];
Chris@82 66 Tj = ii[WS(rs, 5)];
Chris@82 67 Tf = W[8];
Chris@82 68 Th = Tf * Tg;
Chris@82 69 TH = Tf * Tj;
Chris@82 70 {
Chris@82 71 E Te, TG, Tk, TI, Tc, Ti;
Chris@82 72 Tc = W[3];
Chris@82 73 Te = FMA(Tc, Td, Tb);
Chris@82 74 TG = FNMS(Tc, Ta, TF);
Chris@82 75 Ti = W[9];
Chris@82 76 Tk = FMA(Ti, Tj, Th);
Chris@82 77 TI = FNMS(Ti, Tg, TH);
Chris@82 78 Tl = Te - Tk;
Chris@82 79 TR = TG + TI;
Chris@82 80 TB = Te + Tk;
Chris@82 81 TJ = TG - TI;
Chris@82 82 }
Chris@82 83 }
Chris@82 84 {
Chris@82 85 E Tn, Tq, To, TK, Tt, Tw, Tu, TM, Tm, Ts;
Chris@82 86 Tn = ri[WS(rs, 4)];
Chris@82 87 Tq = ii[WS(rs, 4)];
Chris@82 88 Tm = W[6];
Chris@82 89 To = Tm * Tn;
Chris@82 90 TK = Tm * Tq;
Chris@82 91 Tt = ri[WS(rs, 1)];
Chris@82 92 Tw = ii[WS(rs, 1)];
Chris@82 93 Ts = W[0];
Chris@82 94 Tu = Ts * Tt;
Chris@82 95 TM = Ts * Tw;
Chris@82 96 {
Chris@82 97 E Tr, TL, Tx, TN, Tp, Tv;
Chris@82 98 Tp = W[7];
Chris@82 99 Tr = FMA(Tp, Tq, To);
Chris@82 100 TL = FNMS(Tp, Tn, TK);
Chris@82 101 Tv = W[1];
Chris@82 102 Tx = FMA(Tv, Tw, Tu);
Chris@82 103 TN = FNMS(Tv, Tt, TM);
Chris@82 104 Ty = Tr - Tx;
Chris@82 105 TS = TL + TN;
Chris@82 106 TC = Tr + Tx;
Chris@82 107 TO = TL - TN;
Chris@82 108 }
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E TP, T8, Tz, TE;
Chris@82 112 TP = TJ - TO;
Chris@82 113 T8 = T1 - T7;
Chris@82 114 Tz = Tl + Ty;
Chris@82 115 TE = FNMS(KP500000000, Tz, T8);
Chris@82 116 ri[WS(rs, 3)] = T8 + Tz;
Chris@82 117 ri[WS(rs, 1)] = FMA(KP866025403, TP, TE);
Chris@82 118 ri[WS(rs, 5)] = FNMS(KP866025403, TP, TE);
Chris@82 119 }
Chris@82 120 {
Chris@82 121 E T14, T11, T12, T13;
Chris@82 122 T14 = Ty - Tl;
Chris@82 123 T11 = TX - TW;
Chris@82 124 T12 = TJ + TO;
Chris@82 125 T13 = FNMS(KP500000000, T12, T11);
Chris@82 126 ii[WS(rs, 1)] = FMA(KP866025403, T14, T13);
Chris@82 127 ii[WS(rs, 3)] = T12 + T11;
Chris@82 128 ii[WS(rs, 5)] = FNMS(KP866025403, T14, T13);
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E TT, TA, TD, TQ;
Chris@82 132 TT = TR - TS;
Chris@82 133 TA = T1 + T7;
Chris@82 134 TD = TB + TC;
Chris@82 135 TQ = FNMS(KP500000000, TD, TA);
Chris@82 136 ri[0] = TA + TD;
Chris@82 137 ri[WS(rs, 4)] = FMA(KP866025403, TT, TQ);
Chris@82 138 ri[WS(rs, 2)] = FNMS(KP866025403, TT, TQ);
Chris@82 139 }
Chris@82 140 {
Chris@82 141 E T10, TU, TY, TZ;
Chris@82 142 T10 = TC - TB;
Chris@82 143 TU = TR + TS;
Chris@82 144 TY = TW + TX;
Chris@82 145 TZ = FNMS(KP500000000, TU, TY);
Chris@82 146 ii[0] = TU + TY;
Chris@82 147 ii[WS(rs, 4)] = FMA(KP866025403, T10, TZ);
Chris@82 148 ii[WS(rs, 2)] = FNMS(KP866025403, T10, TZ);
Chris@82 149 }
Chris@82 150 }
Chris@82 151 }
Chris@82 152 }
Chris@82 153
Chris@82 154 static const tw_instr twinstr[] = {
Chris@82 155 {TW_FULL, 0, 6},
Chris@82 156 {TW_NEXT, 1, 0}
Chris@82 157 };
Chris@82 158
Chris@82 159 static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {24, 10, 22, 0}, 0, 0, 0 };
Chris@82 160
Chris@82 161 void X(codelet_t1_6) (planner *p) {
Chris@82 162 X(kdft_dit_register) (p, t1_6, &desc);
Chris@82 163 }
Chris@82 164 #else
Chris@82 165
Chris@82 166 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include dft/scalar/t.h */
Chris@82 167
Chris@82 168 /*
Chris@82 169 * This function contains 46 FP additions, 28 FP multiplications,
Chris@82 170 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@82 171 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@82 172 */
Chris@82 173 #include "dft/scalar/t.h"
Chris@82 174
Chris@82 175 static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 176 {
Chris@82 177 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 178 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 179 {
Chris@82 180 INT m;
Chris@82 181 for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@82 182 E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
Chris@82 183 {
Chris@82 184 E T1, TN, T6, TM;
Chris@82 185 T1 = ri[0];
Chris@82 186 TN = ii[0];
Chris@82 187 {
Chris@82 188 E T3, T5, T2, T4;
Chris@82 189 T3 = ri[WS(rs, 3)];
Chris@82 190 T5 = ii[WS(rs, 3)];
Chris@82 191 T2 = W[4];
Chris@82 192 T4 = W[5];
Chris@82 193 T6 = FMA(T2, T3, T4 * T5);
Chris@82 194 TM = FNMS(T4, T3, T2 * T5);
Chris@82 195 }
Chris@82 196 T7 = T1 - T6;
Chris@82 197 TS = TN - TM;
Chris@82 198 Tv = T1 + T6;
Chris@82 199 TO = TM + TN;
Chris@82 200 }
Chris@82 201 {
Chris@82 202 E Tn, TD, Ts, TE;
Chris@82 203 {
Chris@82 204 E Tk, Tm, Tj, Tl;
Chris@82 205 Tk = ri[WS(rs, 4)];
Chris@82 206 Tm = ii[WS(rs, 4)];
Chris@82 207 Tj = W[6];
Chris@82 208 Tl = W[7];
Chris@82 209 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 210 TD = FNMS(Tl, Tk, Tj * Tm);
Chris@82 211 }
Chris@82 212 {
Chris@82 213 E Tp, Tr, To, Tq;
Chris@82 214 Tp = ri[WS(rs, 1)];
Chris@82 215 Tr = ii[WS(rs, 1)];
Chris@82 216 To = W[0];
Chris@82 217 Tq = W[1];
Chris@82 218 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 219 TE = FNMS(Tq, Tp, To * Tr);
Chris@82 220 }
Chris@82 221 Tt = Tn - Ts;
Chris@82 222 TJ = TD + TE;
Chris@82 223 Tx = Tn + Ts;
Chris@82 224 TF = TD - TE;
Chris@82 225 }
Chris@82 226 {
Chris@82 227 E Tc, TA, Th, TB;
Chris@82 228 {
Chris@82 229 E T9, Tb, T8, Ta;
Chris@82 230 T9 = ri[WS(rs, 2)];
Chris@82 231 Tb = ii[WS(rs, 2)];
Chris@82 232 T8 = W[2];
Chris@82 233 Ta = W[3];
Chris@82 234 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 235 TA = FNMS(Ta, T9, T8 * Tb);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E Te, Tg, Td, Tf;
Chris@82 239 Te = ri[WS(rs, 5)];
Chris@82 240 Tg = ii[WS(rs, 5)];
Chris@82 241 Td = W[8];
Chris@82 242 Tf = W[9];
Chris@82 243 Th = FMA(Td, Te, Tf * Tg);
Chris@82 244 TB = FNMS(Tf, Te, Td * Tg);
Chris@82 245 }
Chris@82 246 Ti = Tc - Th;
Chris@82 247 TI = TA + TB;
Chris@82 248 Tw = Tc + Th;
Chris@82 249 TC = TA - TB;
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E TG, Tu, Tz, TR, TT, TU;
Chris@82 253 TG = KP866025403 * (TC - TF);
Chris@82 254 Tu = Ti + Tt;
Chris@82 255 Tz = FNMS(KP500000000, Tu, T7);
Chris@82 256 ri[WS(rs, 3)] = T7 + Tu;
Chris@82 257 ri[WS(rs, 1)] = Tz + TG;
Chris@82 258 ri[WS(rs, 5)] = Tz - TG;
Chris@82 259 TR = KP866025403 * (Tt - Ti);
Chris@82 260 TT = TC + TF;
Chris@82 261 TU = FNMS(KP500000000, TT, TS);
Chris@82 262 ii[WS(rs, 1)] = TR + TU;
Chris@82 263 ii[WS(rs, 3)] = TT + TS;
Chris@82 264 ii[WS(rs, 5)] = TU - TR;
Chris@82 265 }
Chris@82 266 {
Chris@82 267 E TK, Ty, TH, TQ, TL, TP;
Chris@82 268 TK = KP866025403 * (TI - TJ);
Chris@82 269 Ty = Tw + Tx;
Chris@82 270 TH = FNMS(KP500000000, Ty, Tv);
Chris@82 271 ri[0] = Tv + Ty;
Chris@82 272 ri[WS(rs, 4)] = TH + TK;
Chris@82 273 ri[WS(rs, 2)] = TH - TK;
Chris@82 274 TQ = KP866025403 * (Tx - Tw);
Chris@82 275 TL = TI + TJ;
Chris@82 276 TP = FNMS(KP500000000, TL, TO);
Chris@82 277 ii[0] = TL + TO;
Chris@82 278 ii[WS(rs, 4)] = TQ + TP;
Chris@82 279 ii[WS(rs, 2)] = TP - TQ;
Chris@82 280 }
Chris@82 281 }
Chris@82 282 }
Chris@82 283 }
Chris@82 284
Chris@82 285 static const tw_instr twinstr[] = {
Chris@82 286 {TW_FULL, 0, 6},
Chris@82 287 {TW_NEXT, 1, 0}
Chris@82 288 };
Chris@82 289
Chris@82 290 static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {32, 14, 14, 0}, 0, 0, 0 };
Chris@82 291
Chris@82 292 void X(codelet_t1_6) (planner *p) {
Chris@82 293 X(kdft_dit_register) (p, t1_6, &desc);
Chris@82 294 }
Chris@82 295 #endif