annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:41 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hb_6 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@42 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@42 33 * 45 stack variables, 2 constants, and 24 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 41 {
Chris@42 42 INT m;
Chris@42 43 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 44 E TK, TR, TB, TM, TL, TS;
Chris@42 45 {
Chris@42 46 E Td, TN, TO, TJ, Tn, Tk, TC, T3, Tr, T4, T5, T7, T8;
Chris@42 47 {
Chris@42 48 E TH, Tg, Tj, TI, Th, Ti, T1, T2;
Chris@42 49 {
Chris@42 50 E Tb, Tc, Te, Tf;
Chris@42 51 Tb = ci[WS(rs, 5)];
Chris@42 52 Tc = cr[WS(rs, 3)];
Chris@42 53 Te = ci[WS(rs, 3)];
Chris@42 54 Tf = cr[WS(rs, 5)];
Chris@42 55 Th = ci[WS(rs, 4)];
Chris@42 56 Td = Tb - Tc;
Chris@42 57 TN = Tb + Tc;
Chris@42 58 Ti = cr[WS(rs, 4)];
Chris@42 59 TH = Te + Tf;
Chris@42 60 Tg = Te - Tf;
Chris@42 61 }
Chris@42 62 Tj = Th - Ti;
Chris@42 63 TI = Th + Ti;
Chris@42 64 T1 = cr[0];
Chris@42 65 T2 = ci[WS(rs, 2)];
Chris@42 66 TO = TH - TI;
Chris@42 67 TJ = TH + TI;
Chris@42 68 Tn = Tj - Tg;
Chris@42 69 Tk = Tg + Tj;
Chris@42 70 TC = T1 - T2;
Chris@42 71 T3 = T1 + T2;
Chris@42 72 Tr = FNMS(KP500000000, Tk, Td);
Chris@42 73 T4 = cr[WS(rs, 2)];
Chris@42 74 T5 = ci[0];
Chris@42 75 T7 = ci[WS(rs, 1)];
Chris@42 76 T8 = cr[WS(rs, 1)];
Chris@42 77 }
Chris@42 78 {
Chris@42 79 E Tl, Tq, TQ, Ts, Ta, T10, TG;
Chris@42 80 ci[0] = Td + Tk;
Chris@42 81 {
Chris@42 82 E T6, TD, T9, TE, TF;
Chris@42 83 T6 = T4 + T5;
Chris@42 84 TD = T4 - T5;
Chris@42 85 T9 = T7 + T8;
Chris@42 86 TE = T7 - T8;
Chris@42 87 Tl = W[2];
Chris@42 88 Tq = W[3];
Chris@42 89 TQ = TD - TE;
Chris@42 90 TF = TD + TE;
Chris@42 91 Ts = T6 - T9;
Chris@42 92 Ta = T6 + T9;
Chris@42 93 T10 = TC + TF;
Chris@42 94 TG = FNMS(KP500000000, TF, TC);
Chris@42 95 }
Chris@42 96 {
Chris@42 97 E T13, TP, Tz, TZ, Tw, T14, Tv, Ty;
Chris@42 98 {
Chris@42 99 E Tt, T12, T11, Tp, Tm, To, Tu;
Chris@42 100 T13 = TN + TO;
Chris@42 101 TP = FNMS(KP500000000, TO, TN);
Chris@42 102 cr[0] = T3 + Ta;
Chris@42 103 Tm = FNMS(KP500000000, Ta, T3);
Chris@42 104 Tz = FMA(KP866025403, Ts, Tr);
Chris@42 105 Tt = FNMS(KP866025403, Ts, Tr);
Chris@42 106 TZ = W[4];
Chris@42 107 To = FNMS(KP866025403, Tn, Tm);
Chris@42 108 Tw = FMA(KP866025403, Tn, Tm);
Chris@42 109 Tu = Tl * Tt;
Chris@42 110 T12 = W[5];
Chris@42 111 T11 = TZ * T10;
Chris@42 112 Tp = Tl * To;
Chris@42 113 ci[WS(rs, 2)] = FMA(Tq, To, Tu);
Chris@42 114 T14 = T12 * T10;
Chris@42 115 cr[WS(rs, 3)] = FNMS(T12, T13, T11);
Chris@42 116 cr[WS(rs, 2)] = FNMS(Tq, Tt, Tp);
Chris@42 117 }
Chris@42 118 ci[WS(rs, 3)] = FMA(TZ, T13, T14);
Chris@42 119 Tv = W[6];
Chris@42 120 Ty = W[7];
Chris@42 121 {
Chris@42 122 E TX, TT, TW, TV, TY, TU, TA, Tx;
Chris@42 123 TK = FNMS(KP866025403, TJ, TG);
Chris@42 124 TU = FMA(KP866025403, TJ, TG);
Chris@42 125 TA = Tv * Tz;
Chris@42 126 Tx = Tv * Tw;
Chris@42 127 TX = FNMS(KP866025403, TQ, TP);
Chris@42 128 TR = FMA(KP866025403, TQ, TP);
Chris@42 129 ci[WS(rs, 4)] = FMA(Ty, Tw, TA);
Chris@42 130 cr[WS(rs, 4)] = FNMS(Ty, Tz, Tx);
Chris@42 131 TT = W[8];
Chris@42 132 TW = W[9];
Chris@42 133 TB = W[0];
Chris@42 134 TV = TT * TU;
Chris@42 135 TY = TW * TU;
Chris@42 136 TM = W[1];
Chris@42 137 TL = TB * TK;
Chris@42 138 cr[WS(rs, 5)] = FNMS(TW, TX, TV);
Chris@42 139 ci[WS(rs, 5)] = FMA(TT, TX, TY);
Chris@42 140 }
Chris@42 141 }
Chris@42 142 }
Chris@42 143 }
Chris@42 144 cr[WS(rs, 1)] = FNMS(TM, TR, TL);
Chris@42 145 TS = TM * TK;
Chris@42 146 ci[WS(rs, 1)] = FMA(TB, TR, TS);
Chris@42 147 }
Chris@42 148 }
Chris@42 149 }
Chris@42 150
Chris@42 151 static const tw_instr twinstr[] = {
Chris@42 152 {TW_FULL, 1, 6},
Chris@42 153 {TW_NEXT, 1, 0}
Chris@42 154 };
Chris@42 155
Chris@42 156 static const hc2hc_desc desc = { 6, "hb_6", twinstr, &GENUS, {24, 10, 22, 0} };
Chris@42 157
Chris@42 158 void X(codelet_hb_6) (planner *p) {
Chris@42 159 X(khc2hc_register) (p, hb_6, &desc);
Chris@42 160 }
Chris@42 161 #else /* HAVE_FMA */
Chris@42 162
Chris@42 163 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hb_6 -include hb.h */
Chris@42 164
Chris@42 165 /*
Chris@42 166 * This function contains 46 FP additions, 28 FP multiplications,
Chris@42 167 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@42 168 * 27 stack variables, 2 constants, and 24 memory accesses
Chris@42 169 */
Chris@42 170 #include "hb.h"
Chris@42 171
Chris@42 172 static void hb_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 173 {
Chris@42 174 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 175 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 176 {
Chris@42 177 INT m;
Chris@42 178 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 179 E T3, Ty, Ta, TO, Tr, TB, Td, TE, Tk, TL, Tn, TH;
Chris@42 180 {
Chris@42 181 E T1, T2, Tb, Tc;
Chris@42 182 T1 = cr[0];
Chris@42 183 T2 = ci[WS(rs, 2)];
Chris@42 184 T3 = T1 + T2;
Chris@42 185 Ty = T1 - T2;
Chris@42 186 {
Chris@42 187 E T6, Tz, T9, TA;
Chris@42 188 {
Chris@42 189 E T4, T5, T7, T8;
Chris@42 190 T4 = cr[WS(rs, 2)];
Chris@42 191 T5 = ci[0];
Chris@42 192 T6 = T4 + T5;
Chris@42 193 Tz = T4 - T5;
Chris@42 194 T7 = ci[WS(rs, 1)];
Chris@42 195 T8 = cr[WS(rs, 1)];
Chris@42 196 T9 = T7 + T8;
Chris@42 197 TA = T7 - T8;
Chris@42 198 }
Chris@42 199 Ta = T6 + T9;
Chris@42 200 TO = KP866025403 * (Tz - TA);
Chris@42 201 Tr = KP866025403 * (T6 - T9);
Chris@42 202 TB = Tz + TA;
Chris@42 203 }
Chris@42 204 Tb = ci[WS(rs, 5)];
Chris@42 205 Tc = cr[WS(rs, 3)];
Chris@42 206 Td = Tb - Tc;
Chris@42 207 TE = Tb + Tc;
Chris@42 208 {
Chris@42 209 E Tg, TG, Tj, TF;
Chris@42 210 {
Chris@42 211 E Te, Tf, Th, Ti;
Chris@42 212 Te = ci[WS(rs, 3)];
Chris@42 213 Tf = cr[WS(rs, 5)];
Chris@42 214 Tg = Te - Tf;
Chris@42 215 TG = Te + Tf;
Chris@42 216 Th = ci[WS(rs, 4)];
Chris@42 217 Ti = cr[WS(rs, 4)];
Chris@42 218 Tj = Th - Ti;
Chris@42 219 TF = Th + Ti;
Chris@42 220 }
Chris@42 221 Tk = Tg + Tj;
Chris@42 222 TL = KP866025403 * (TG + TF);
Chris@42 223 Tn = KP866025403 * (Tj - Tg);
Chris@42 224 TH = TF - TG;
Chris@42 225 }
Chris@42 226 }
Chris@42 227 cr[0] = T3 + Ta;
Chris@42 228 ci[0] = Td + Tk;
Chris@42 229 {
Chris@42 230 E TC, TI, Tx, TD;
Chris@42 231 TC = Ty + TB;
Chris@42 232 TI = TE - TH;
Chris@42 233 Tx = W[4];
Chris@42 234 TD = W[5];
Chris@42 235 cr[WS(rs, 3)] = FNMS(TD, TI, Tx * TC);
Chris@42 236 ci[WS(rs, 3)] = FMA(TD, TC, Tx * TI);
Chris@42 237 }
Chris@42 238 {
Chris@42 239 E To, Tu, Ts, Tw, Tm, Tq;
Chris@42 240 Tm = FNMS(KP500000000, Ta, T3);
Chris@42 241 To = Tm - Tn;
Chris@42 242 Tu = Tm + Tn;
Chris@42 243 Tq = FNMS(KP500000000, Tk, Td);
Chris@42 244 Ts = Tq - Tr;
Chris@42 245 Tw = Tr + Tq;
Chris@42 246 {
Chris@42 247 E Tl, Tp, Tt, Tv;
Chris@42 248 Tl = W[2];
Chris@42 249 Tp = W[3];
Chris@42 250 cr[WS(rs, 2)] = FNMS(Tp, Ts, Tl * To);
Chris@42 251 ci[WS(rs, 2)] = FMA(Tl, Ts, Tp * To);
Chris@42 252 Tt = W[6];
Chris@42 253 Tv = W[7];
Chris@42 254 cr[WS(rs, 4)] = FNMS(Tv, Tw, Tt * Tu);
Chris@42 255 ci[WS(rs, 4)] = FMA(Tt, Tw, Tv * Tu);
Chris@42 256 }
Chris@42 257 }
Chris@42 258 {
Chris@42 259 E TM, TS, TQ, TU, TK, TP;
Chris@42 260 TK = FNMS(KP500000000, TB, Ty);
Chris@42 261 TM = TK - TL;
Chris@42 262 TS = TK + TL;
Chris@42 263 TP = FMA(KP500000000, TH, TE);
Chris@42 264 TQ = TO + TP;
Chris@42 265 TU = TP - TO;
Chris@42 266 {
Chris@42 267 E TJ, TN, TR, TT;
Chris@42 268 TJ = W[0];
Chris@42 269 TN = W[1];
Chris@42 270 cr[WS(rs, 1)] = FNMS(TN, TQ, TJ * TM);
Chris@42 271 ci[WS(rs, 1)] = FMA(TN, TM, TJ * TQ);
Chris@42 272 TR = W[8];
Chris@42 273 TT = W[9];
Chris@42 274 cr[WS(rs, 5)] = FNMS(TT, TU, TR * TS);
Chris@42 275 ci[WS(rs, 5)] = FMA(TT, TS, TR * TU);
Chris@42 276 }
Chris@42 277 }
Chris@42 278 }
Chris@42 279 }
Chris@42 280 }
Chris@42 281
Chris@42 282 static const tw_instr twinstr[] = {
Chris@42 283 {TW_FULL, 1, 6},
Chris@42 284 {TW_NEXT, 1, 0}
Chris@42 285 };
Chris@42 286
Chris@42 287 static const hc2hc_desc desc = { 6, "hb_6", twinstr, &GENUS, {32, 14, 14, 0} };
Chris@42 288
Chris@42 289 void X(codelet_hb_6) (planner *p) {
Chris@42 290 X(khc2hc_register) (p, hb_6, &desc);
Chris@42 291 }
Chris@42 292 #endif /* HAVE_FMA */