annotate src/fftw-3.3.3/dft/scalar/codelets/t2_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:36:09 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_twiddle.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include t.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 44 FP additions, 40 FP multiplications,
Chris@10 32 * (or, 14 additions, 10 multiplications, 30 fused multiply/add),
Chris@10 33 * 47 stack variables, 4 constants, and 20 memory accesses
Chris@10 34 */
Chris@10 35 #include "t.h"
Chris@10 36
Chris@10 37 static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 38 {
Chris@10 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 43 {
Chris@10 44 INT m;
Chris@10 45 for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@10 46 E Ta, T1, TO, Tp, TS, Ti, TL, TC, To, TE, Ts, TF, T2, T8, T5;
Chris@10 47 E TT, Tt, TG;
Chris@10 48 T2 = W[0];
Chris@10 49 Ta = W[3];
Chris@10 50 T8 = W[2];
Chris@10 51 T5 = W[1];
Chris@10 52 {
Chris@10 53 E Tq, Tr, Te, T9;
Chris@10 54 T1 = ri[0];
Chris@10 55 Te = T2 * Ta;
Chris@10 56 T9 = T2 * T8;
Chris@10 57 TO = ii[0];
Chris@10 58 {
Chris@10 59 E T3, Tf, Tm, Tj, Tb, T4, T6, Tc, Tg;
Chris@10 60 T3 = ri[WS(rs, 1)];
Chris@10 61 Tf = FMA(T5, T8, Te);
Chris@10 62 Tm = FNMS(T5, T8, Te);
Chris@10 63 Tj = FMA(T5, Ta, T9);
Chris@10 64 Tb = FNMS(T5, Ta, T9);
Chris@10 65 T4 = T2 * T3;
Chris@10 66 T6 = ii[WS(rs, 1)];
Chris@10 67 Tc = ri[WS(rs, 4)];
Chris@10 68 Tg = ii[WS(rs, 4)];
Chris@10 69 {
Chris@10 70 E Tk, Tl, Tn, TD;
Chris@10 71 {
Chris@10 72 E T7, Tz, Th, TB, Ty, Td, TA;
Chris@10 73 Tk = ri[WS(rs, 2)];
Chris@10 74 T7 = FMA(T5, T6, T4);
Chris@10 75 Ty = T2 * T6;
Chris@10 76 Td = Tb * Tc;
Chris@10 77 TA = Tb * Tg;
Chris@10 78 Tl = Tj * Tk;
Chris@10 79 Tz = FNMS(T5, T3, Ty);
Chris@10 80 Th = FMA(Tf, Tg, Td);
Chris@10 81 TB = FNMS(Tf, Tc, TA);
Chris@10 82 Tn = ii[WS(rs, 2)];
Chris@10 83 Tp = ri[WS(rs, 3)];
Chris@10 84 TS = T7 - Th;
Chris@10 85 Ti = T7 + Th;
Chris@10 86 TL = Tz + TB;
Chris@10 87 TC = Tz - TB;
Chris@10 88 TD = Tj * Tn;
Chris@10 89 Tq = T8 * Tp;
Chris@10 90 Tr = ii[WS(rs, 3)];
Chris@10 91 }
Chris@10 92 To = FMA(Tm, Tn, Tl);
Chris@10 93 TE = FNMS(Tm, Tk, TD);
Chris@10 94 }
Chris@10 95 }
Chris@10 96 Ts = FMA(Ta, Tr, Tq);
Chris@10 97 TF = T8 * Tr;
Chris@10 98 }
Chris@10 99 TT = To - Ts;
Chris@10 100 Tt = To + Ts;
Chris@10 101 TG = FNMS(Ta, Tp, TF);
Chris@10 102 {
Chris@10 103 E TU, TW, TV, TR, Tw, Tu;
Chris@10 104 TU = FMA(KP618033988, TT, TS);
Chris@10 105 TW = FNMS(KP618033988, TS, TT);
Chris@10 106 Tw = Ti - Tt;
Chris@10 107 Tu = Ti + Tt;
Chris@10 108 {
Chris@10 109 E TM, TH, Tv, TI, TK;
Chris@10 110 TM = TE + TG;
Chris@10 111 TH = TE - TG;
Chris@10 112 ri[0] = T1 + Tu;
Chris@10 113 Tv = FNMS(KP250000000, Tu, T1);
Chris@10 114 TI = FMA(KP618033988, TH, TC);
Chris@10 115 TK = FNMS(KP618033988, TC, TH);
Chris@10 116 {
Chris@10 117 E TQ, TN, TJ, Tx, TP;
Chris@10 118 TQ = TL - TM;
Chris@10 119 TN = TL + TM;
Chris@10 120 TJ = FNMS(KP559016994, Tw, Tv);
Chris@10 121 Tx = FMA(KP559016994, Tw, Tv);
Chris@10 122 ii[0] = TN + TO;
Chris@10 123 TP = FNMS(KP250000000, TN, TO);
Chris@10 124 ri[WS(rs, 1)] = FMA(KP951056516, TI, Tx);
Chris@10 125 ri[WS(rs, 4)] = FNMS(KP951056516, TI, Tx);
Chris@10 126 ri[WS(rs, 3)] = FMA(KP951056516, TK, TJ);
Chris@10 127 ri[WS(rs, 2)] = FNMS(KP951056516, TK, TJ);
Chris@10 128 TV = FNMS(KP559016994, TQ, TP);
Chris@10 129 TR = FMA(KP559016994, TQ, TP);
Chris@10 130 }
Chris@10 131 }
Chris@10 132 ii[WS(rs, 4)] = FMA(KP951056516, TU, TR);
Chris@10 133 ii[WS(rs, 1)] = FNMS(KP951056516, TU, TR);
Chris@10 134 ii[WS(rs, 3)] = FNMS(KP951056516, TW, TV);
Chris@10 135 ii[WS(rs, 2)] = FMA(KP951056516, TW, TV);
Chris@10 136 }
Chris@10 137 }
Chris@10 138 }
Chris@10 139 }
Chris@10 140
Chris@10 141 static const tw_instr twinstr[] = {
Chris@10 142 {TW_CEXP, 0, 1},
Chris@10 143 {TW_CEXP, 0, 3},
Chris@10 144 {TW_NEXT, 1, 0}
Chris@10 145 };
Chris@10 146
Chris@10 147 static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {14, 10, 30, 0}, 0, 0, 0 };
Chris@10 148
Chris@10 149 void X(codelet_t2_5) (planner *p) {
Chris@10 150 X(kdft_dit_register) (p, t2_5, &desc);
Chris@10 151 }
Chris@10 152 #else /* HAVE_FMA */
Chris@10 153
Chris@10 154 /* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include t.h */
Chris@10 155
Chris@10 156 /*
Chris@10 157 * This function contains 44 FP additions, 32 FP multiplications,
Chris@10 158 * (or, 30 additions, 18 multiplications, 14 fused multiply/add),
Chris@10 159 * 37 stack variables, 4 constants, and 20 memory accesses
Chris@10 160 */
Chris@10 161 #include "t.h"
Chris@10 162
Chris@10 163 static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 164 {
Chris@10 165 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 166 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 167 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 168 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 169 {
Chris@10 170 INT m;
Chris@10 171 for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@10 172 E T2, T4, T7, T9, Tb, Tl, Tf, Tj;
Chris@10 173 {
Chris@10 174 E T8, Te, Ta, Td;
Chris@10 175 T2 = W[0];
Chris@10 176 T4 = W[1];
Chris@10 177 T7 = W[2];
Chris@10 178 T9 = W[3];
Chris@10 179 T8 = T2 * T7;
Chris@10 180 Te = T4 * T7;
Chris@10 181 Ta = T4 * T9;
Chris@10 182 Td = T2 * T9;
Chris@10 183 Tb = T8 - Ta;
Chris@10 184 Tl = Td - Te;
Chris@10 185 Tf = Td + Te;
Chris@10 186 Tj = T8 + Ta;
Chris@10 187 }
Chris@10 188 {
Chris@10 189 E T1, TI, Ty, TB, TN, TM, TF, TG, TH, Ti, Tr, Ts;
Chris@10 190 T1 = ri[0];
Chris@10 191 TI = ii[0];
Chris@10 192 {
Chris@10 193 E T6, Tw, Tq, TA, Th, Tx, Tn, Tz;
Chris@10 194 {
Chris@10 195 E T3, T5, To, Tp;
Chris@10 196 T3 = ri[WS(rs, 1)];
Chris@10 197 T5 = ii[WS(rs, 1)];
Chris@10 198 T6 = FMA(T2, T3, T4 * T5);
Chris@10 199 Tw = FNMS(T4, T3, T2 * T5);
Chris@10 200 To = ri[WS(rs, 3)];
Chris@10 201 Tp = ii[WS(rs, 3)];
Chris@10 202 Tq = FMA(T7, To, T9 * Tp);
Chris@10 203 TA = FNMS(T9, To, T7 * Tp);
Chris@10 204 }
Chris@10 205 {
Chris@10 206 E Tc, Tg, Tk, Tm;
Chris@10 207 Tc = ri[WS(rs, 4)];
Chris@10 208 Tg = ii[WS(rs, 4)];
Chris@10 209 Th = FMA(Tb, Tc, Tf * Tg);
Chris@10 210 Tx = FNMS(Tf, Tc, Tb * Tg);
Chris@10 211 Tk = ri[WS(rs, 2)];
Chris@10 212 Tm = ii[WS(rs, 2)];
Chris@10 213 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@10 214 Tz = FNMS(Tl, Tk, Tj * Tm);
Chris@10 215 }
Chris@10 216 Ty = Tw - Tx;
Chris@10 217 TB = Tz - TA;
Chris@10 218 TN = Tn - Tq;
Chris@10 219 TM = T6 - Th;
Chris@10 220 TF = Tw + Tx;
Chris@10 221 TG = Tz + TA;
Chris@10 222 TH = TF + TG;
Chris@10 223 Ti = T6 + Th;
Chris@10 224 Tr = Tn + Tq;
Chris@10 225 Ts = Ti + Tr;
Chris@10 226 }
Chris@10 227 ri[0] = T1 + Ts;
Chris@10 228 ii[0] = TH + TI;
Chris@10 229 {
Chris@10 230 E TC, TE, Tv, TD, Tt, Tu;
Chris@10 231 TC = FMA(KP951056516, Ty, KP587785252 * TB);
Chris@10 232 TE = FNMS(KP587785252, Ty, KP951056516 * TB);
Chris@10 233 Tt = KP559016994 * (Ti - Tr);
Chris@10 234 Tu = FNMS(KP250000000, Ts, T1);
Chris@10 235 Tv = Tt + Tu;
Chris@10 236 TD = Tu - Tt;
Chris@10 237 ri[WS(rs, 4)] = Tv - TC;
Chris@10 238 ri[WS(rs, 3)] = TD + TE;
Chris@10 239 ri[WS(rs, 1)] = Tv + TC;
Chris@10 240 ri[WS(rs, 2)] = TD - TE;
Chris@10 241 }
Chris@10 242 {
Chris@10 243 E TO, TP, TL, TQ, TJ, TK;
Chris@10 244 TO = FMA(KP951056516, TM, KP587785252 * TN);
Chris@10 245 TP = FNMS(KP587785252, TM, KP951056516 * TN);
Chris@10 246 TJ = KP559016994 * (TF - TG);
Chris@10 247 TK = FNMS(KP250000000, TH, TI);
Chris@10 248 TL = TJ + TK;
Chris@10 249 TQ = TK - TJ;
Chris@10 250 ii[WS(rs, 1)] = TL - TO;
Chris@10 251 ii[WS(rs, 3)] = TQ - TP;
Chris@10 252 ii[WS(rs, 4)] = TO + TL;
Chris@10 253 ii[WS(rs, 2)] = TP + TQ;
Chris@10 254 }
Chris@10 255 }
Chris@10 256 }
Chris@10 257 }
Chris@10 258 }
Chris@10 259
Chris@10 260 static const tw_instr twinstr[] = {
Chris@10 261 {TW_CEXP, 0, 1},
Chris@10 262 {TW_CEXP, 0, 3},
Chris@10 263 {TW_NEXT, 1, 0}
Chris@10 264 };
Chris@10 265
Chris@10 266 static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {30, 18, 14, 0}, 0, 0, 0 };
Chris@10 267
Chris@10 268 void X(codelet_t2_5) (planner *p) {
Chris@10 269 X(kdft_dit_register) (p, t2_5, &desc);
Chris@10 270 }
Chris@10 271 #endif /* HAVE_FMA */