annotate src/fftw-3.3.8/rdft/scalar/r2cb/hb2_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:40 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 5 -dif -name hb2_5 -include rdft/scalar/hb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 44 FP additions, 40 FP multiplications,
Chris@82 32 * (or, 14 additions, 10 multiplications, 30 fused multiply/add),
Chris@82 33 * 37 stack variables, 4 constants, and 20 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hb.h"
Chris@82 36
Chris@82 37 static void hb2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 46 E T9, TB, Tz, Tm, TC, TO, TG, TJ, TA, TF;
Chris@82 47 T9 = W[0];
Chris@82 48 TB = W[3];
Chris@82 49 Tz = W[2];
Chris@82 50 TA = T9 * Tz;
Chris@82 51 TF = T9 * TB;
Chris@82 52 Tm = W[1];
Chris@82 53 TC = FNMS(Tm, TB, TA);
Chris@82 54 TO = FNMS(Tm, Tz, TF);
Chris@82 55 TG = FMA(Tm, Tz, TF);
Chris@82 56 TJ = FMA(Tm, TB, TA);
Chris@82 57 {
Chris@82 58 E T1, Tb, TQ, Tw, T8, Ta, Tn, Tj, TL, Ts, Tq, Tr;
Chris@82 59 {
Chris@82 60 E T4, Tu, T7, Tv;
Chris@82 61 T1 = cr[0];
Chris@82 62 {
Chris@82 63 E T2, T3, T5, T6;
Chris@82 64 T2 = cr[WS(rs, 1)];
Chris@82 65 T3 = ci[0];
Chris@82 66 T4 = T2 + T3;
Chris@82 67 Tu = T2 - T3;
Chris@82 68 T5 = cr[WS(rs, 2)];
Chris@82 69 T6 = ci[WS(rs, 1)];
Chris@82 70 T7 = T5 + T6;
Chris@82 71 Tv = T5 - T6;
Chris@82 72 }
Chris@82 73 Tb = T4 - T7;
Chris@82 74 TQ = FNMS(KP618033988, Tu, Tv);
Chris@82 75 Tw = FMA(KP618033988, Tv, Tu);
Chris@82 76 T8 = T4 + T7;
Chris@82 77 Ta = FNMS(KP250000000, T8, T1);
Chris@82 78 }
Chris@82 79 {
Chris@82 80 E Tf, To, Ti, Tp;
Chris@82 81 Tn = ci[WS(rs, 4)];
Chris@82 82 {
Chris@82 83 E Td, Te, Tg, Th;
Chris@82 84 Td = ci[WS(rs, 3)];
Chris@82 85 Te = cr[WS(rs, 4)];
Chris@82 86 Tf = Td + Te;
Chris@82 87 To = Td - Te;
Chris@82 88 Tg = ci[WS(rs, 2)];
Chris@82 89 Th = cr[WS(rs, 3)];
Chris@82 90 Ti = Tg + Th;
Chris@82 91 Tp = Tg - Th;
Chris@82 92 }
Chris@82 93 Tj = FMA(KP618033988, Ti, Tf);
Chris@82 94 TL = FNMS(KP618033988, Tf, Ti);
Chris@82 95 Ts = To - Tp;
Chris@82 96 Tq = To + Tp;
Chris@82 97 Tr = FNMS(KP250000000, Tq, Tn);
Chris@82 98 }
Chris@82 99 cr[0] = T1 + T8;
Chris@82 100 ci[0] = Tn + Tq;
Chris@82 101 {
Chris@82 102 E Tk, TD, Tx, TH, Tc, Tt;
Chris@82 103 Tc = FMA(KP559016994, Tb, Ta);
Chris@82 104 Tk = FNMS(KP951056516, Tj, Tc);
Chris@82 105 TD = FMA(KP951056516, Tj, Tc);
Chris@82 106 Tt = FMA(KP559016994, Ts, Tr);
Chris@82 107 Tx = FMA(KP951056516, Tw, Tt);
Chris@82 108 TH = FNMS(KP951056516, Tw, Tt);
Chris@82 109 {
Chris@82 110 E Tl, Ty, TE, TI;
Chris@82 111 Tl = T9 * Tk;
Chris@82 112 cr[WS(rs, 1)] = FNMS(Tm, Tx, Tl);
Chris@82 113 Ty = Tm * Tk;
Chris@82 114 ci[WS(rs, 1)] = FMA(T9, Tx, Ty);
Chris@82 115 TE = TC * TD;
Chris@82 116 cr[WS(rs, 4)] = FNMS(TG, TH, TE);
Chris@82 117 TI = TG * TD;
Chris@82 118 ci[WS(rs, 4)] = FMA(TC, TH, TI);
Chris@82 119 }
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E TM, TT, TR, TV, TK, TP;
Chris@82 123 TK = FNMS(KP559016994, Tb, Ta);
Chris@82 124 TM = FMA(KP951056516, TL, TK);
Chris@82 125 TT = FNMS(KP951056516, TL, TK);
Chris@82 126 TP = FNMS(KP559016994, Ts, Tr);
Chris@82 127 TR = FNMS(KP951056516, TQ, TP);
Chris@82 128 TV = FMA(KP951056516, TQ, TP);
Chris@82 129 {
Chris@82 130 E TN, TS, TU, TW;
Chris@82 131 TN = TJ * TM;
Chris@82 132 cr[WS(rs, 2)] = FNMS(TO, TR, TN);
Chris@82 133 TS = TO * TM;
Chris@82 134 ci[WS(rs, 2)] = FMA(TJ, TR, TS);
Chris@82 135 TU = Tz * TT;
Chris@82 136 cr[WS(rs, 3)] = FNMS(TB, TV, TU);
Chris@82 137 TW = TB * TT;
Chris@82 138 ci[WS(rs, 3)] = FMA(Tz, TV, TW);
Chris@82 139 }
Chris@82 140 }
Chris@82 141 }
Chris@82 142 }
Chris@82 143 }
Chris@82 144 }
Chris@82 145
Chris@82 146 static const tw_instr twinstr[] = {
Chris@82 147 {TW_CEXP, 1, 1},
Chris@82 148 {TW_CEXP, 1, 3},
Chris@82 149 {TW_NEXT, 1, 0}
Chris@82 150 };
Chris@82 151
Chris@82 152 static const hc2hc_desc desc = { 5, "hb2_5", twinstr, &GENUS, {14, 10, 30, 0} };
Chris@82 153
Chris@82 154 void X(codelet_hb2_5) (planner *p) {
Chris@82 155 X(khc2hc_register) (p, hb2_5, &desc);
Chris@82 156 }
Chris@82 157 #else
Chris@82 158
Chris@82 159 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 5 -dif -name hb2_5 -include rdft/scalar/hb.h */
Chris@82 160
Chris@82 161 /*
Chris@82 162 * This function contains 44 FP additions, 32 FP multiplications,
Chris@82 163 * (or, 30 additions, 18 multiplications, 14 fused multiply/add),
Chris@82 164 * 33 stack variables, 4 constants, and 20 memory accesses
Chris@82 165 */
Chris@82 166 #include "rdft/scalar/hb.h"
Chris@82 167
Chris@82 168 static void hb2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 169 {
Chris@82 170 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 171 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 172 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 173 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 174 {
Chris@82 175 INT m;
Chris@82 176 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 177 E Th, Tk, Ti, Tl, Tn, TP, Tx, TN;
Chris@82 178 {
Chris@82 179 E Tj, Tw, Tm, Tv;
Chris@82 180 Th = W[0];
Chris@82 181 Tk = W[1];
Chris@82 182 Ti = W[2];
Chris@82 183 Tl = W[3];
Chris@82 184 Tj = Th * Ti;
Chris@82 185 Tw = Tk * Ti;
Chris@82 186 Tm = Tk * Tl;
Chris@82 187 Tv = Th * Tl;
Chris@82 188 Tn = Tj + Tm;
Chris@82 189 TP = Tv + Tw;
Chris@82 190 Tx = Tv - Tw;
Chris@82 191 TN = Tj - Tm;
Chris@82 192 }
Chris@82 193 {
Chris@82 194 E T1, Tp, TK, TA, T8, To, T9, Tt, TI, TC, Tg, TB;
Chris@82 195 {
Chris@82 196 E T4, Ty, T7, Tz;
Chris@82 197 T1 = cr[0];
Chris@82 198 {
Chris@82 199 E T2, T3, T5, T6;
Chris@82 200 T2 = cr[WS(rs, 1)];
Chris@82 201 T3 = ci[0];
Chris@82 202 T4 = T2 + T3;
Chris@82 203 Ty = T2 - T3;
Chris@82 204 T5 = cr[WS(rs, 2)];
Chris@82 205 T6 = ci[WS(rs, 1)];
Chris@82 206 T7 = T5 + T6;
Chris@82 207 Tz = T5 - T6;
Chris@82 208 }
Chris@82 209 Tp = KP559016994 * (T4 - T7);
Chris@82 210 TK = FMA(KP951056516, Ty, KP587785252 * Tz);
Chris@82 211 TA = FNMS(KP951056516, Tz, KP587785252 * Ty);
Chris@82 212 T8 = T4 + T7;
Chris@82 213 To = FNMS(KP250000000, T8, T1);
Chris@82 214 }
Chris@82 215 {
Chris@82 216 E Tc, Tr, Tf, Ts;
Chris@82 217 T9 = ci[WS(rs, 4)];
Chris@82 218 {
Chris@82 219 E Ta, Tb, Td, Te;
Chris@82 220 Ta = ci[WS(rs, 3)];
Chris@82 221 Tb = cr[WS(rs, 4)];
Chris@82 222 Tc = Ta - Tb;
Chris@82 223 Tr = Ta + Tb;
Chris@82 224 Td = ci[WS(rs, 2)];
Chris@82 225 Te = cr[WS(rs, 3)];
Chris@82 226 Tf = Td - Te;
Chris@82 227 Ts = Td + Te;
Chris@82 228 }
Chris@82 229 Tt = FNMS(KP951056516, Ts, KP587785252 * Tr);
Chris@82 230 TI = FMA(KP951056516, Tr, KP587785252 * Ts);
Chris@82 231 TC = KP559016994 * (Tc - Tf);
Chris@82 232 Tg = Tc + Tf;
Chris@82 233 TB = FNMS(KP250000000, Tg, T9);
Chris@82 234 }
Chris@82 235 cr[0] = T1 + T8;
Chris@82 236 ci[0] = T9 + Tg;
Chris@82 237 {
Chris@82 238 E Tu, TF, TE, TG, Tq, TD;
Chris@82 239 Tq = To - Tp;
Chris@82 240 Tu = Tq - Tt;
Chris@82 241 TF = Tq + Tt;
Chris@82 242 TD = TB - TC;
Chris@82 243 TE = TA + TD;
Chris@82 244 TG = TD - TA;
Chris@82 245 cr[WS(rs, 2)] = FNMS(Tx, TE, Tn * Tu);
Chris@82 246 ci[WS(rs, 2)] = FMA(Tn, TE, Tx * Tu);
Chris@82 247 cr[WS(rs, 3)] = FNMS(Tl, TG, Ti * TF);
Chris@82 248 ci[WS(rs, 3)] = FMA(Ti, TG, Tl * TF);
Chris@82 249 }
Chris@82 250 {
Chris@82 251 E TJ, TO, TM, TQ, TH, TL;
Chris@82 252 TH = Tp + To;
Chris@82 253 TJ = TH - TI;
Chris@82 254 TO = TH + TI;
Chris@82 255 TL = TC + TB;
Chris@82 256 TM = TK + TL;
Chris@82 257 TQ = TL - TK;
Chris@82 258 cr[WS(rs, 1)] = FNMS(Tk, TM, Th * TJ);
Chris@82 259 ci[WS(rs, 1)] = FMA(Th, TM, Tk * TJ);
Chris@82 260 cr[WS(rs, 4)] = FNMS(TP, TQ, TN * TO);
Chris@82 261 ci[WS(rs, 4)] = FMA(TN, TQ, TP * TO);
Chris@82 262 }
Chris@82 263 }
Chris@82 264 }
Chris@82 265 }
Chris@82 266 }
Chris@82 267
Chris@82 268 static const tw_instr twinstr[] = {
Chris@82 269 {TW_CEXP, 1, 1},
Chris@82 270 {TW_CEXP, 1, 3},
Chris@82 271 {TW_NEXT, 1, 0}
Chris@82 272 };
Chris@82 273
Chris@82 274 static const hc2hc_desc desc = { 5, "hb2_5", twinstr, &GENUS, {30, 18, 14, 0} };
Chris@82 275
Chris@82 276 void X(codelet_hb2_5) (planner *p) {
Chris@82 277 X(khc2hc_register) (p, hb2_5, &desc);
Chris@82 278 }
Chris@82 279 #endif