annotate src/fftw-3.3.5/dft/scalar/codelets/t1_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:36:06 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include t.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@42 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@42 33 * 47 stack variables, 2 constants, and 24 memory accesses
Chris@42 34 */
Chris@42 35 #include "t.h"
Chris@42 36
Chris@42 37 static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 41 {
Chris@42 42 INT m;
Chris@42 43 for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 44 E TY, TU, T10, TZ;
Chris@42 45 {
Chris@42 46 E T1, TX, TW, T7, Tn, Tq, TJ, TR, TB, Tl, To, TK, Tt, Tw, Ts;
Chris@42 47 E Tp, Tv;
Chris@42 48 T1 = ri[0];
Chris@42 49 TX = ii[0];
Chris@42 50 {
Chris@42 51 E T3, T6, T2, T5;
Chris@42 52 T3 = ri[WS(rs, 3)];
Chris@42 53 T6 = ii[WS(rs, 3)];
Chris@42 54 T2 = W[4];
Chris@42 55 T5 = W[5];
Chris@42 56 {
Chris@42 57 E Ta, Td, Tg, TF, Tb, Tj, Tf, Tc, Ti, TV, T4, T9;
Chris@42 58 Ta = ri[WS(rs, 2)];
Chris@42 59 Td = ii[WS(rs, 2)];
Chris@42 60 TV = T2 * T6;
Chris@42 61 T4 = T2 * T3;
Chris@42 62 T9 = W[2];
Chris@42 63 Tg = ri[WS(rs, 5)];
Chris@42 64 TW = FNMS(T5, T3, TV);
Chris@42 65 T7 = FMA(T5, T6, T4);
Chris@42 66 TF = T9 * Td;
Chris@42 67 Tb = T9 * Ta;
Chris@42 68 Tj = ii[WS(rs, 5)];
Chris@42 69 Tf = W[8];
Chris@42 70 Tc = W[3];
Chris@42 71 Ti = W[9];
Chris@42 72 {
Chris@42 73 E TG, Te, TI, Tk, TH, Th, Tm;
Chris@42 74 Tn = ri[WS(rs, 4)];
Chris@42 75 TH = Tf * Tj;
Chris@42 76 Th = Tf * Tg;
Chris@42 77 TG = FNMS(Tc, Ta, TF);
Chris@42 78 Te = FMA(Tc, Td, Tb);
Chris@42 79 TI = FNMS(Ti, Tg, TH);
Chris@42 80 Tk = FMA(Ti, Tj, Th);
Chris@42 81 Tq = ii[WS(rs, 4)];
Chris@42 82 Tm = W[6];
Chris@42 83 TJ = TG - TI;
Chris@42 84 TR = TG + TI;
Chris@42 85 TB = Te + Tk;
Chris@42 86 Tl = Te - Tk;
Chris@42 87 To = Tm * Tn;
Chris@42 88 TK = Tm * Tq;
Chris@42 89 }
Chris@42 90 Tt = ri[WS(rs, 1)];
Chris@42 91 Tw = ii[WS(rs, 1)];
Chris@42 92 Ts = W[0];
Chris@42 93 Tp = W[7];
Chris@42 94 Tv = W[1];
Chris@42 95 }
Chris@42 96 }
Chris@42 97 {
Chris@42 98 E TA, T8, TL, Tr, TN, Tx, T11, TM, Tu;
Chris@42 99 TA = T1 + T7;
Chris@42 100 T8 = T1 - T7;
Chris@42 101 TM = Ts * Tw;
Chris@42 102 Tu = Ts * Tt;
Chris@42 103 TL = FNMS(Tp, Tn, TK);
Chris@42 104 Tr = FMA(Tp, Tq, To);
Chris@42 105 TN = FNMS(Tv, Tt, TM);
Chris@42 106 Tx = FMA(Tv, Tw, Tu);
Chris@42 107 T11 = TX - TW;
Chris@42 108 TY = TW + TX;
Chris@42 109 {
Chris@42 110 E TP, TT, TD, TE, TQ, Tz, T14, T13;
Chris@42 111 {
Chris@42 112 E TO, TS, TC, Ty, T12;
Chris@42 113 TO = TL - TN;
Chris@42 114 TS = TL + TN;
Chris@42 115 TC = Tr + Tx;
Chris@42 116 Ty = Tr - Tx;
Chris@42 117 T12 = TJ + TO;
Chris@42 118 TP = TJ - TO;
Chris@42 119 TT = TR - TS;
Chris@42 120 TU = TR + TS;
Chris@42 121 Tz = Tl + Ty;
Chris@42 122 T14 = Ty - Tl;
Chris@42 123 ii[WS(rs, 3)] = T12 + T11;
Chris@42 124 T13 = FNMS(KP500000000, T12, T11);
Chris@42 125 T10 = TC - TB;
Chris@42 126 TD = TB + TC;
Chris@42 127 }
Chris@42 128 ri[WS(rs, 3)] = T8 + Tz;
Chris@42 129 TE = FNMS(KP500000000, Tz, T8);
Chris@42 130 ii[WS(rs, 5)] = FNMS(KP866025403, T14, T13);
Chris@42 131 ii[WS(rs, 1)] = FMA(KP866025403, T14, T13);
Chris@42 132 TQ = FNMS(KP500000000, TD, TA);
Chris@42 133 ri[WS(rs, 5)] = FNMS(KP866025403, TP, TE);
Chris@42 134 ri[WS(rs, 1)] = FMA(KP866025403, TP, TE);
Chris@42 135 ri[0] = TA + TD;
Chris@42 136 ri[WS(rs, 4)] = FMA(KP866025403, TT, TQ);
Chris@42 137 ri[WS(rs, 2)] = FNMS(KP866025403, TT, TQ);
Chris@42 138 }
Chris@42 139 }
Chris@42 140 }
Chris@42 141 ii[0] = TU + TY;
Chris@42 142 TZ = FNMS(KP500000000, TU, TY);
Chris@42 143 ii[WS(rs, 2)] = FNMS(KP866025403, T10, TZ);
Chris@42 144 ii[WS(rs, 4)] = FMA(KP866025403, T10, TZ);
Chris@42 145 }
Chris@42 146 }
Chris@42 147 }
Chris@42 148
Chris@42 149 static const tw_instr twinstr[] = {
Chris@42 150 {TW_FULL, 0, 6},
Chris@42 151 {TW_NEXT, 1, 0}
Chris@42 152 };
Chris@42 153
Chris@42 154 static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {24, 10, 22, 0}, 0, 0, 0 };
Chris@42 155
Chris@42 156 void X(codelet_t1_6) (planner *p) {
Chris@42 157 X(kdft_dit_register) (p, t1_6, &desc);
Chris@42 158 }
Chris@42 159 #else /* HAVE_FMA */
Chris@42 160
Chris@42 161 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include t.h */
Chris@42 162
Chris@42 163 /*
Chris@42 164 * This function contains 46 FP additions, 28 FP multiplications,
Chris@42 165 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@42 166 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@42 167 */
Chris@42 168 #include "t.h"
Chris@42 169
Chris@42 170 static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 171 {
Chris@42 172 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 173 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 174 {
Chris@42 175 INT m;
Chris@42 176 for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 177 E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
Chris@42 178 {
Chris@42 179 E T1, TN, T6, TM;
Chris@42 180 T1 = ri[0];
Chris@42 181 TN = ii[0];
Chris@42 182 {
Chris@42 183 E T3, T5, T2, T4;
Chris@42 184 T3 = ri[WS(rs, 3)];
Chris@42 185 T5 = ii[WS(rs, 3)];
Chris@42 186 T2 = W[4];
Chris@42 187 T4 = W[5];
Chris@42 188 T6 = FMA(T2, T3, T4 * T5);
Chris@42 189 TM = FNMS(T4, T3, T2 * T5);
Chris@42 190 }
Chris@42 191 T7 = T1 - T6;
Chris@42 192 TS = TN - TM;
Chris@42 193 Tv = T1 + T6;
Chris@42 194 TO = TM + TN;
Chris@42 195 }
Chris@42 196 {
Chris@42 197 E Tn, TD, Ts, TE;
Chris@42 198 {
Chris@42 199 E Tk, Tm, Tj, Tl;
Chris@42 200 Tk = ri[WS(rs, 4)];
Chris@42 201 Tm = ii[WS(rs, 4)];
Chris@42 202 Tj = W[6];
Chris@42 203 Tl = W[7];
Chris@42 204 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 205 TD = FNMS(Tl, Tk, Tj * Tm);
Chris@42 206 }
Chris@42 207 {
Chris@42 208 E Tp, Tr, To, Tq;
Chris@42 209 Tp = ri[WS(rs, 1)];
Chris@42 210 Tr = ii[WS(rs, 1)];
Chris@42 211 To = W[0];
Chris@42 212 Tq = W[1];
Chris@42 213 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 214 TE = FNMS(Tq, Tp, To * Tr);
Chris@42 215 }
Chris@42 216 Tt = Tn - Ts;
Chris@42 217 TJ = TD + TE;
Chris@42 218 Tx = Tn + Ts;
Chris@42 219 TF = TD - TE;
Chris@42 220 }
Chris@42 221 {
Chris@42 222 E Tc, TA, Th, TB;
Chris@42 223 {
Chris@42 224 E T9, Tb, T8, Ta;
Chris@42 225 T9 = ri[WS(rs, 2)];
Chris@42 226 Tb = ii[WS(rs, 2)];
Chris@42 227 T8 = W[2];
Chris@42 228 Ta = W[3];
Chris@42 229 Tc = FMA(T8, T9, Ta * Tb);
Chris@42 230 TA = FNMS(Ta, T9, T8 * Tb);
Chris@42 231 }
Chris@42 232 {
Chris@42 233 E Te, Tg, Td, Tf;
Chris@42 234 Te = ri[WS(rs, 5)];
Chris@42 235 Tg = ii[WS(rs, 5)];
Chris@42 236 Td = W[8];
Chris@42 237 Tf = W[9];
Chris@42 238 Th = FMA(Td, Te, Tf * Tg);
Chris@42 239 TB = FNMS(Tf, Te, Td * Tg);
Chris@42 240 }
Chris@42 241 Ti = Tc - Th;
Chris@42 242 TI = TA + TB;
Chris@42 243 Tw = Tc + Th;
Chris@42 244 TC = TA - TB;
Chris@42 245 }
Chris@42 246 {
Chris@42 247 E TG, Tu, Tz, TR, TT, TU;
Chris@42 248 TG = KP866025403 * (TC - TF);
Chris@42 249 Tu = Ti + Tt;
Chris@42 250 Tz = FNMS(KP500000000, Tu, T7);
Chris@42 251 ri[WS(rs, 3)] = T7 + Tu;
Chris@42 252 ri[WS(rs, 1)] = Tz + TG;
Chris@42 253 ri[WS(rs, 5)] = Tz - TG;
Chris@42 254 TR = KP866025403 * (Tt - Ti);
Chris@42 255 TT = TC + TF;
Chris@42 256 TU = FNMS(KP500000000, TT, TS);
Chris@42 257 ii[WS(rs, 1)] = TR + TU;
Chris@42 258 ii[WS(rs, 3)] = TT + TS;
Chris@42 259 ii[WS(rs, 5)] = TU - TR;
Chris@42 260 }
Chris@42 261 {
Chris@42 262 E TK, Ty, TH, TQ, TL, TP;
Chris@42 263 TK = KP866025403 * (TI - TJ);
Chris@42 264 Ty = Tw + Tx;
Chris@42 265 TH = FNMS(KP500000000, Ty, Tv);
Chris@42 266 ri[0] = Tv + Ty;
Chris@42 267 ri[WS(rs, 4)] = TH + TK;
Chris@42 268 ri[WS(rs, 2)] = TH - TK;
Chris@42 269 TQ = KP866025403 * (Tx - Tw);
Chris@42 270 TL = TI + TJ;
Chris@42 271 TP = FNMS(KP500000000, TL, TO);
Chris@42 272 ii[0] = TL + TO;
Chris@42 273 ii[WS(rs, 4)] = TQ + TP;
Chris@42 274 ii[WS(rs, 2)] = TP - TQ;
Chris@42 275 }
Chris@42 276 }
Chris@42 277 }
Chris@42 278 }
Chris@42 279
Chris@42 280 static const tw_instr twinstr[] = {
Chris@42 281 {TW_FULL, 0, 6},
Chris@42 282 {TW_NEXT, 1, 0}
Chris@42 283 };
Chris@42 284
Chris@42 285 static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {32, 14, 14, 0}, 0, 0, 0 };
Chris@42 286
Chris@42 287 void X(codelet_t1_6) (planner *p) {
Chris@42 288 X(kdft_dit_register) (p, t1_6, &desc);
Chris@42 289 }
Chris@42 290 #endif /* HAVE_FMA */