annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:31 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 40 FP additions, 34 FP multiplications,
Chris@82 32 * (or, 14 additions, 8 multiplications, 26 fused multiply/add),
Chris@82 33 * 27 stack variables, 4 constants, and 20 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 46 E T1, Tb, TM, Tw, T8, Ta, Tn, Tj, TH, Ts, Tq, Tr;
Chris@82 47 {
Chris@82 48 E T4, Tu, T7, Tv;
Chris@82 49 T1 = cr[0];
Chris@82 50 {
Chris@82 51 E T2, T3, T5, T6;
Chris@82 52 T2 = cr[WS(rs, 1)];
Chris@82 53 T3 = ci[0];
Chris@82 54 T4 = T2 + T3;
Chris@82 55 Tu = T2 - T3;
Chris@82 56 T5 = cr[WS(rs, 2)];
Chris@82 57 T6 = ci[WS(rs, 1)];
Chris@82 58 T7 = T5 + T6;
Chris@82 59 Tv = T5 - T6;
Chris@82 60 }
Chris@82 61 Tb = T4 - T7;
Chris@82 62 TM = FNMS(KP618033988, Tu, Tv);
Chris@82 63 Tw = FMA(KP618033988, Tv, Tu);
Chris@82 64 T8 = T4 + T7;
Chris@82 65 Ta = FNMS(KP250000000, T8, T1);
Chris@82 66 }
Chris@82 67 {
Chris@82 68 E Tf, To, Ti, Tp;
Chris@82 69 Tn = ci[WS(rs, 4)];
Chris@82 70 {
Chris@82 71 E Td, Te, Tg, Th;
Chris@82 72 Td = ci[WS(rs, 3)];
Chris@82 73 Te = cr[WS(rs, 4)];
Chris@82 74 Tf = Td + Te;
Chris@82 75 To = Td - Te;
Chris@82 76 Tg = ci[WS(rs, 2)];
Chris@82 77 Th = cr[WS(rs, 3)];
Chris@82 78 Ti = Tg + Th;
Chris@82 79 Tp = Tg - Th;
Chris@82 80 }
Chris@82 81 Tj = FMA(KP618033988, Ti, Tf);
Chris@82 82 TH = FNMS(KP618033988, Tf, Ti);
Chris@82 83 Ts = To - Tp;
Chris@82 84 Tq = To + Tp;
Chris@82 85 Tr = FNMS(KP250000000, Tq, Tn);
Chris@82 86 }
Chris@82 87 cr[0] = T1 + T8;
Chris@82 88 ci[0] = Tn + Tq;
Chris@82 89 {
Chris@82 90 E Tk, TA, Tx, TD, Tc, Tt;
Chris@82 91 Tc = FMA(KP559016994, Tb, Ta);
Chris@82 92 Tk = FNMS(KP951056516, Tj, Tc);
Chris@82 93 TA = FMA(KP951056516, Tj, Tc);
Chris@82 94 Tt = FMA(KP559016994, Ts, Tr);
Chris@82 95 Tx = FMA(KP951056516, Tw, Tt);
Chris@82 96 TD = FNMS(KP951056516, Tw, Tt);
Chris@82 97 {
Chris@82 98 E T9, Tl, Tm, Ty;
Chris@82 99 T9 = W[0];
Chris@82 100 Tl = T9 * Tk;
Chris@82 101 Tm = W[1];
Chris@82 102 Ty = Tm * Tk;
Chris@82 103 cr[WS(rs, 1)] = FNMS(Tm, Tx, Tl);
Chris@82 104 ci[WS(rs, 1)] = FMA(T9, Tx, Ty);
Chris@82 105 }
Chris@82 106 {
Chris@82 107 E Tz, TB, TC, TE;
Chris@82 108 Tz = W[6];
Chris@82 109 TB = Tz * TA;
Chris@82 110 TC = W[7];
Chris@82 111 TE = TC * TA;
Chris@82 112 cr[WS(rs, 4)] = FNMS(TC, TD, TB);
Chris@82 113 ci[WS(rs, 4)] = FMA(Tz, TD, TE);
Chris@82 114 }
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E TI, TQ, TN, TT, TG, TL;
Chris@82 118 TG = FNMS(KP559016994, Tb, Ta);
Chris@82 119 TI = FMA(KP951056516, TH, TG);
Chris@82 120 TQ = FNMS(KP951056516, TH, TG);
Chris@82 121 TL = FNMS(KP559016994, Ts, Tr);
Chris@82 122 TN = FNMS(KP951056516, TM, TL);
Chris@82 123 TT = FMA(KP951056516, TM, TL);
Chris@82 124 {
Chris@82 125 E TF, TJ, TK, TO;
Chris@82 126 TF = W[2];
Chris@82 127 TJ = TF * TI;
Chris@82 128 TK = W[3];
Chris@82 129 TO = TK * TI;
Chris@82 130 cr[WS(rs, 2)] = FNMS(TK, TN, TJ);
Chris@82 131 ci[WS(rs, 2)] = FMA(TF, TN, TO);
Chris@82 132 }
Chris@82 133 {
Chris@82 134 E TP, TR, TS, TU;
Chris@82 135 TP = W[4];
Chris@82 136 TR = TP * TQ;
Chris@82 137 TS = W[5];
Chris@82 138 TU = TS * TQ;
Chris@82 139 cr[WS(rs, 3)] = FNMS(TS, TT, TR);
Chris@82 140 ci[WS(rs, 3)] = FMA(TP, TT, TU);
Chris@82 141 }
Chris@82 142 }
Chris@82 143 }
Chris@82 144 }
Chris@82 145 }
Chris@82 146
Chris@82 147 static const tw_instr twinstr[] = {
Chris@82 148 {TW_FULL, 1, 5},
Chris@82 149 {TW_NEXT, 1, 0}
Chris@82 150 };
Chris@82 151
Chris@82 152 static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {14, 8, 26, 0} };
Chris@82 153
Chris@82 154 void X(codelet_hb_5) (planner *p) {
Chris@82 155 X(khc2hc_register) (p, hb_5, &desc);
Chris@82 156 }
Chris@82 157 #else
Chris@82 158
Chris@82 159 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 5 -dif -name hb_5 -include rdft/scalar/hb.h */
Chris@82 160
Chris@82 161 /*
Chris@82 162 * This function contains 40 FP additions, 28 FP multiplications,
Chris@82 163 * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
Chris@82 164 * 27 stack variables, 4 constants, and 20 memory accesses
Chris@82 165 */
Chris@82 166 #include "rdft/scalar/hb.h"
Chris@82 167
Chris@82 168 static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 169 {
Chris@82 170 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 171 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 172 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 173 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 174 {
Chris@82 175 INT m;
Chris@82 176 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 177 E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt;
Chris@82 178 {
Chris@82 179 E T4, Tq, T7, Tr;
Chris@82 180 T1 = cr[0];
Chris@82 181 {
Chris@82 182 E T2, T3, T5, T6;
Chris@82 183 T2 = cr[WS(rs, 1)];
Chris@82 184 T3 = ci[0];
Chris@82 185 T4 = T2 + T3;
Chris@82 186 Tq = T2 - T3;
Chris@82 187 T5 = cr[WS(rs, 2)];
Chris@82 188 T6 = ci[WS(rs, 1)];
Chris@82 189 T7 = T5 + T6;
Chris@82 190 Tr = T5 - T6;
Chris@82 191 }
Chris@82 192 Tj = KP559016994 * (T4 - T7);
Chris@82 193 TG = FMA(KP951056516, Tq, KP587785252 * Tr);
Chris@82 194 Ts = FNMS(KP951056516, Tr, KP587785252 * Tq);
Chris@82 195 T8 = T4 + T7;
Chris@82 196 Ti = FNMS(KP250000000, T8, T1);
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E Tc, Tl, Tf, Tm;
Chris@82 200 T9 = ci[WS(rs, 4)];
Chris@82 201 {
Chris@82 202 E Ta, Tb, Td, Te;
Chris@82 203 Ta = ci[WS(rs, 3)];
Chris@82 204 Tb = cr[WS(rs, 4)];
Chris@82 205 Tc = Ta - Tb;
Chris@82 206 Tl = Ta + Tb;
Chris@82 207 Td = ci[WS(rs, 2)];
Chris@82 208 Te = cr[WS(rs, 3)];
Chris@82 209 Tf = Td - Te;
Chris@82 210 Tm = Td + Te;
Chris@82 211 }
Chris@82 212 Tn = FNMS(KP951056516, Tm, KP587785252 * Tl);
Chris@82 213 TD = FMA(KP951056516, Tl, KP587785252 * Tm);
Chris@82 214 Tu = KP559016994 * (Tc - Tf);
Chris@82 215 Tg = Tc + Tf;
Chris@82 216 Tt = FNMS(KP250000000, Tg, T9);
Chris@82 217 }
Chris@82 218 cr[0] = T1 + T8;
Chris@82 219 ci[0] = T9 + Tg;
Chris@82 220 {
Chris@82 221 E To, Ty, Tw, TA, Tk, Tv;
Chris@82 222 Tk = Ti - Tj;
Chris@82 223 To = Tk - Tn;
Chris@82 224 Ty = Tk + Tn;
Chris@82 225 Tv = Tt - Tu;
Chris@82 226 Tw = Ts + Tv;
Chris@82 227 TA = Tv - Ts;
Chris@82 228 {
Chris@82 229 E Th, Tp, Tx, Tz;
Chris@82 230 Th = W[2];
Chris@82 231 Tp = W[3];
Chris@82 232 cr[WS(rs, 2)] = FNMS(Tp, Tw, Th * To);
Chris@82 233 ci[WS(rs, 2)] = FMA(Th, Tw, Tp * To);
Chris@82 234 Tx = W[4];
Chris@82 235 Tz = W[5];
Chris@82 236 cr[WS(rs, 3)] = FNMS(Tz, TA, Tx * Ty);
Chris@82 237 ci[WS(rs, 3)] = FMA(Tx, TA, Tz * Ty);
Chris@82 238 }
Chris@82 239 }
Chris@82 240 {
Chris@82 241 E TE, TK, TI, TM, TC, TH;
Chris@82 242 TC = Tj + Ti;
Chris@82 243 TE = TC - TD;
Chris@82 244 TK = TC + TD;
Chris@82 245 TH = Tu + Tt;
Chris@82 246 TI = TG + TH;
Chris@82 247 TM = TH - TG;
Chris@82 248 {
Chris@82 249 E TB, TF, TJ, TL;
Chris@82 250 TB = W[0];
Chris@82 251 TF = W[1];
Chris@82 252 cr[WS(rs, 1)] = FNMS(TF, TI, TB * TE);
Chris@82 253 ci[WS(rs, 1)] = FMA(TB, TI, TF * TE);
Chris@82 254 TJ = W[6];
Chris@82 255 TL = W[7];
Chris@82 256 cr[WS(rs, 4)] = FNMS(TL, TM, TJ * TK);
Chris@82 257 ci[WS(rs, 4)] = FMA(TJ, TM, TL * TK);
Chris@82 258 }
Chris@82 259 }
Chris@82 260 }
Chris@82 261 }
Chris@82 262 }
Chris@82 263
Chris@82 264 static const tw_instr twinstr[] = {
Chris@82 265 {TW_FULL, 1, 5},
Chris@82 266 {TW_NEXT, 1, 0}
Chris@82 267 };
Chris@82 268
Chris@82 269 static const hc2hc_desc desc = { 5, "hb_5", twinstr, &GENUS, {26, 14, 14, 0} };
Chris@82 270
Chris@82 271 void X(codelet_hb_5) (planner *p) {
Chris@82 272 X(khc2hc_register) (p, hb_5, &desc);
Chris@82 273 }
Chris@82 274 #endif