annotate src/fftw-3.3.5/rdft/scalar/r2cb/hb_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:40 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include hb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 40 FP additions, 34 FP multiplications,
Chris@42 32 * (or, 14 additions, 8 multiplications, 26 fused multiply/add),
Chris@42 33 * 42 stack variables, 4 constants, and 20 memory accesses
Chris@42 34 */
Chris@42 35 #include "hb.h"
Chris@42 36
Chris@42 37 static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@42 46 E TQ, TP, TT, TR, TS, TU;
Chris@42 47 {
Chris@42 48 E T1, Tn, TM, Tw, Tb, T8, To, Tf, Ta, Tg, Th;
Chris@42 49 {
Chris@42 50 E T2, T3, T5, T6, T4, Tu;
Chris@42 51 T1 = cr[0];
Chris@42 52 T2 = cr[WS(rs, 1)];
Chris@42 53 T3 = ci[0];
Chris@42 54 T5 = cr[WS(rs, 2)];
Chris@42 55 T6 = ci[WS(rs, 1)];
Chris@42 56 Tn = ci[WS(rs, 4)];
Chris@42 57 T4 = T2 + T3;
Chris@42 58 Tu = T2 - T3;
Chris@42 59 {
Chris@42 60 E T7, Tv, Td, Te;
Chris@42 61 T7 = T5 + T6;
Chris@42 62 Tv = T5 - T6;
Chris@42 63 Td = ci[WS(rs, 3)];
Chris@42 64 Te = cr[WS(rs, 4)];
Chris@42 65 TM = FNMS(KP618033988, Tu, Tv);
Chris@42 66 Tw = FMA(KP618033988, Tv, Tu);
Chris@42 67 Tb = T4 - T7;
Chris@42 68 T8 = T4 + T7;
Chris@42 69 To = Td - Te;
Chris@42 70 Tf = Td + Te;
Chris@42 71 Ta = FNMS(KP250000000, T8, T1);
Chris@42 72 Tg = ci[WS(rs, 2)];
Chris@42 73 Th = cr[WS(rs, 3)];
Chris@42 74 }
Chris@42 75 }
Chris@42 76 cr[0] = T1 + T8;
Chris@42 77 {
Chris@42 78 E TG, T9, Tm, Tz, TH, TC, TA, Tk, Tt, TL, Tc, Ti, Tp, TI;
Chris@42 79 TG = FNMS(KP559016994, Tb, Ta);
Chris@42 80 Tc = FMA(KP559016994, Tb, Ta);
Chris@42 81 T9 = W[0];
Chris@42 82 Ti = Tg + Th;
Chris@42 83 Tp = Tg - Th;
Chris@42 84 Tm = W[1];
Chris@42 85 {
Chris@42 86 E Ts, Tj, Tr, Tq;
Chris@42 87 Tz = W[6];
Chris@42 88 Ts = To - Tp;
Chris@42 89 Tq = To + Tp;
Chris@42 90 Tj = FMA(KP618033988, Ti, Tf);
Chris@42 91 TH = FNMS(KP618033988, Tf, Ti);
Chris@42 92 ci[0] = Tn + Tq;
Chris@42 93 Tr = FNMS(KP250000000, Tq, Tn);
Chris@42 94 TC = W[7];
Chris@42 95 TA = FMA(KP951056516, Tj, Tc);
Chris@42 96 Tk = FNMS(KP951056516, Tj, Tc);
Chris@42 97 Tt = FMA(KP559016994, Ts, Tr);
Chris@42 98 TL = FNMS(KP559016994, Ts, Tr);
Chris@42 99 }
Chris@42 100 {
Chris@42 101 E TE, TB, Ty, Tl, TD, Tx;
Chris@42 102 TE = TC * TA;
Chris@42 103 TB = Tz * TA;
Chris@42 104 Ty = Tm * Tk;
Chris@42 105 Tl = T9 * Tk;
Chris@42 106 TD = FNMS(KP951056516, Tw, Tt);
Chris@42 107 Tx = FMA(KP951056516, Tw, Tt);
Chris@42 108 TI = FMA(KP951056516, TH, TG);
Chris@42 109 TQ = FNMS(KP951056516, TH, TG);
Chris@42 110 ci[WS(rs, 4)] = FMA(Tz, TD, TE);
Chris@42 111 cr[WS(rs, 4)] = FNMS(TC, TD, TB);
Chris@42 112 ci[WS(rs, 1)] = FMA(T9, Tx, Ty);
Chris@42 113 cr[WS(rs, 1)] = FNMS(Tm, Tx, Tl);
Chris@42 114 }
Chris@42 115 {
Chris@42 116 E TF, TK, TN, TJ, TO;
Chris@42 117 TF = W[2];
Chris@42 118 TK = W[3];
Chris@42 119 TP = W[4];
Chris@42 120 TT = FMA(KP951056516, TM, TL);
Chris@42 121 TN = FNMS(KP951056516, TM, TL);
Chris@42 122 TJ = TF * TI;
Chris@42 123 TO = TK * TI;
Chris@42 124 TR = TP * TQ;
Chris@42 125 TS = W[5];
Chris@42 126 cr[WS(rs, 2)] = FNMS(TK, TN, TJ);
Chris@42 127 ci[WS(rs, 2)] = FMA(TF, TN, TO);
Chris@42 128 }
Chris@42 129 }
Chris@42 130 }
Chris@42 131 cr[WS(rs, 3)] = FNMS(TS, TT, TR);
Chris@42 132 TU = TS * TQ;
Chris@42 133 ci[WS(rs, 3)] = FMA(TP, TT, TU);
Chris@42 134 }
Chris@42 135 }
Chris@42 136 }
Chris@42 137
Chris@42 138 static const tw_instr twinstr[] = {
Chris@42 139 {TW_FULL, 1, 5},
Chris@42 140 {TW_NEXT, 1, 0}
Chris@42 141 };
Chris@42 142
Chris@42 143 static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {14, 8, 26, 0} };
Chris@42 144
Chris@42 145 void X(codelet_hb_5) (planner *p) {
Chris@42 146 X(khc2hc_register) (p, hb_5, &desc);
Chris@42 147 }
Chris@42 148 #else /* HAVE_FMA */
Chris@42 149
Chris@42 150 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include hb.h */
Chris@42 151
Chris@42 152 /*
Chris@42 153 * This function contains 40 FP additions, 28 FP multiplications,
Chris@42 154 * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
Chris@42 155 * 27 stack variables, 4 constants, and 20 memory accesses
Chris@42 156 */
Chris@42 157 #include "hb.h"
Chris@42 158
Chris@42 159 static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 160 {
Chris@42 161 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 162 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 163 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 164 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 165 {
Chris@42 166 INT m;
Chris@42 167 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@42 168 E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt;
Chris@42 169 {
Chris@42 170 E T4, Tq, T7, Tr;
Chris@42 171 T1 = cr[0];
Chris@42 172 {
Chris@42 173 E T2, T3, T5, T6;
Chris@42 174 T2 = cr[WS(rs, 1)];
Chris@42 175 T3 = ci[0];
Chris@42 176 T4 = T2 + T3;
Chris@42 177 Tq = T2 - T3;
Chris@42 178 T5 = cr[WS(rs, 2)];
Chris@42 179 T6 = ci[WS(rs, 1)];
Chris@42 180 T7 = T5 + T6;
Chris@42 181 Tr = T5 - T6;
Chris@42 182 }
Chris@42 183 Tj = KP559016994 * (T4 - T7);
Chris@42 184 TG = FMA(KP951056516, Tq, KP587785252 * Tr);
Chris@42 185 Ts = FNMS(KP951056516, Tr, KP587785252 * Tq);
Chris@42 186 T8 = T4 + T7;
Chris@42 187 Ti = FNMS(KP250000000, T8, T1);
Chris@42 188 }
Chris@42 189 {
Chris@42 190 E Tc, Tl, Tf, Tm;
Chris@42 191 T9 = ci[WS(rs, 4)];
Chris@42 192 {
Chris@42 193 E Ta, Tb, Td, Te;
Chris@42 194 Ta = ci[WS(rs, 3)];
Chris@42 195 Tb = cr[WS(rs, 4)];
Chris@42 196 Tc = Ta - Tb;
Chris@42 197 Tl = Ta + Tb;
Chris@42 198 Td = ci[WS(rs, 2)];
Chris@42 199 Te = cr[WS(rs, 3)];
Chris@42 200 Tf = Td - Te;
Chris@42 201 Tm = Td + Te;
Chris@42 202 }
Chris@42 203 Tn = FNMS(KP951056516, Tm, KP587785252 * Tl);
Chris@42 204 TD = FMA(KP951056516, Tl, KP587785252 * Tm);
Chris@42 205 Tu = KP559016994 * (Tc - Tf);
Chris@42 206 Tg = Tc + Tf;
Chris@42 207 Tt = FNMS(KP250000000, Tg, T9);
Chris@42 208 }
Chris@42 209 cr[0] = T1 + T8;
Chris@42 210 ci[0] = T9 + Tg;
Chris@42 211 {
Chris@42 212 E To, Ty, Tw, TA, Tk, Tv;
Chris@42 213 Tk = Ti - Tj;
Chris@42 214 To = Tk - Tn;
Chris@42 215 Ty = Tk + Tn;
Chris@42 216 Tv = Tt - Tu;
Chris@42 217 Tw = Ts + Tv;
Chris@42 218 TA = Tv - Ts;
Chris@42 219 {
Chris@42 220 E Th, Tp, Tx, Tz;
Chris@42 221 Th = W[2];
Chris@42 222 Tp = W[3];
Chris@42 223 cr[WS(rs, 2)] = FNMS(Tp, Tw, Th * To);
Chris@42 224 ci[WS(rs, 2)] = FMA(Th, Tw, Tp * To);
Chris@42 225 Tx = W[4];
Chris@42 226 Tz = W[5];
Chris@42 227 cr[WS(rs, 3)] = FNMS(Tz, TA, Tx * Ty);
Chris@42 228 ci[WS(rs, 3)] = FMA(Tx, TA, Tz * Ty);
Chris@42 229 }
Chris@42 230 }
Chris@42 231 {
Chris@42 232 E TE, TK, TI, TM, TC, TH;
Chris@42 233 TC = Tj + Ti;
Chris@42 234 TE = TC - TD;
Chris@42 235 TK = TC + TD;
Chris@42 236 TH = Tu + Tt;
Chris@42 237 TI = TG + TH;
Chris@42 238 TM = TH - TG;
Chris@42 239 {
Chris@42 240 E TB, TF, TJ, TL;
Chris@42 241 TB = W[0];
Chris@42 242 TF = W[1];
Chris@42 243 cr[WS(rs, 1)] = FNMS(TF, TI, TB * TE);
Chris@42 244 ci[WS(rs, 1)] = FMA(TB, TI, TF * TE);
Chris@42 245 TJ = W[6];
Chris@42 246 TL = W[7];
Chris@42 247 cr[WS(rs, 4)] = FNMS(TL, TM, TJ * TK);
Chris@42 248 ci[WS(rs, 4)] = FMA(TJ, TM, TL * TK);
Chris@42 249 }
Chris@42 250 }
Chris@42 251 }
Chris@42 252 }
Chris@42 253 }
Chris@42 254
Chris@42 255 static const tw_instr twinstr[] = {
Chris@42 256 {TW_FULL, 1, 5},
Chris@42 257 {TW_NEXT, 1, 0}
Chris@42 258 };
Chris@42 259
Chris@42 260 static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {26, 14, 14, 0} };
Chris@42 261
Chris@42 262 void X(codelet_hb_5) (planner *p) {
Chris@42 263 X(khc2hc_register) (p, hb_5, &desc);
Chris@42 264 }
Chris@42 265 #endif /* HAVE_FMA */