annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf2_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:37 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 44 FP additions, 40 FP multiplications,
Chris@82 32 * (or, 14 additions, 10 multiplications, 30 fused multiply/add),
Chris@82 33 * 38 stack variables, 4 constants, and 20 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 46 E T2, Ta, T8, T5, Tb, Tm, Tf, Tj, T9, Te;
Chris@82 47 T2 = W[0];
Chris@82 48 Ta = W[3];
Chris@82 49 T8 = W[2];
Chris@82 50 T9 = T2 * T8;
Chris@82 51 Te = T2 * Ta;
Chris@82 52 T5 = W[1];
Chris@82 53 Tb = FNMS(T5, Ta, T9);
Chris@82 54 Tm = FNMS(T5, T8, Te);
Chris@82 55 Tf = FMA(T5, T8, Te);
Chris@82 56 Tj = FMA(T5, Ta, T9);
Chris@82 57 {
Chris@82 58 E T1, TL, T7, Th, Ti, Tz, TB, TM, To, Ts, Tt, TE, TG, TN;
Chris@82 59 T1 = cr[0];
Chris@82 60 TL = ci[0];
Chris@82 61 {
Chris@82 62 E T3, T4, T6, Ty, Tc, Td, Tg, TA;
Chris@82 63 T3 = cr[WS(rs, 1)];
Chris@82 64 T4 = T2 * T3;
Chris@82 65 T6 = ci[WS(rs, 1)];
Chris@82 66 Ty = T2 * T6;
Chris@82 67 Tc = cr[WS(rs, 4)];
Chris@82 68 Td = Tb * Tc;
Chris@82 69 Tg = ci[WS(rs, 4)];
Chris@82 70 TA = Tb * Tg;
Chris@82 71 T7 = FMA(T5, T6, T4);
Chris@82 72 Th = FMA(Tf, Tg, Td);
Chris@82 73 Ti = T7 + Th;
Chris@82 74 Tz = FNMS(T5, T3, Ty);
Chris@82 75 TB = FNMS(Tf, Tc, TA);
Chris@82 76 TM = Tz + TB;
Chris@82 77 }
Chris@82 78 {
Chris@82 79 E Tk, Tl, Tn, TD, Tp, Tq, Tr, TF;
Chris@82 80 Tk = cr[WS(rs, 2)];
Chris@82 81 Tl = Tj * Tk;
Chris@82 82 Tn = ci[WS(rs, 2)];
Chris@82 83 TD = Tj * Tn;
Chris@82 84 Tp = cr[WS(rs, 3)];
Chris@82 85 Tq = T8 * Tp;
Chris@82 86 Tr = ci[WS(rs, 3)];
Chris@82 87 TF = T8 * Tr;
Chris@82 88 To = FMA(Tm, Tn, Tl);
Chris@82 89 Ts = FMA(Ta, Tr, Tq);
Chris@82 90 Tt = To + Ts;
Chris@82 91 TE = FNMS(Tm, Tk, TD);
Chris@82 92 TG = FNMS(Ta, Tp, TF);
Chris@82 93 TN = TE + TG;
Chris@82 94 }
Chris@82 95 {
Chris@82 96 E Tw, Tu, Tv, TI, TK, TC, TH, Tx, TJ;
Chris@82 97 Tw = Ti - Tt;
Chris@82 98 Tu = Ti + Tt;
Chris@82 99 Tv = FNMS(KP250000000, Tu, T1);
Chris@82 100 TC = Tz - TB;
Chris@82 101 TH = TE - TG;
Chris@82 102 TI = FMA(KP618033988, TH, TC);
Chris@82 103 TK = FNMS(KP618033988, TC, TH);
Chris@82 104 cr[0] = T1 + Tu;
Chris@82 105 Tx = FMA(KP559016994, Tw, Tv);
Chris@82 106 ci[0] = FNMS(KP951056516, TI, Tx);
Chris@82 107 cr[WS(rs, 1)] = FMA(KP951056516, TI, Tx);
Chris@82 108 TJ = FNMS(KP559016994, Tw, Tv);
Chris@82 109 cr[WS(rs, 2)] = FNMS(KP951056516, TK, TJ);
Chris@82 110 ci[WS(rs, 1)] = FMA(KP951056516, TK, TJ);
Chris@82 111 }
Chris@82 112 {
Chris@82 113 E TQ, TO, TP, TU, TW, TS, TT, TV, TR;
Chris@82 114 TQ = TM - TN;
Chris@82 115 TO = TM + TN;
Chris@82 116 TP = FNMS(KP250000000, TO, TL);
Chris@82 117 TS = To - Ts;
Chris@82 118 TT = Th - T7;
Chris@82 119 TU = FMA(KP618033988, TT, TS);
Chris@82 120 TW = FNMS(KP618033988, TS, TT);
Chris@82 121 ci[WS(rs, 4)] = TO + TL;
Chris@82 122 TV = FMA(KP559016994, TQ, TP);
Chris@82 123 cr[WS(rs, 4)] = FMS(KP951056516, TW, TV);
Chris@82 124 ci[WS(rs, 3)] = FMA(KP951056516, TW, TV);
Chris@82 125 TR = FNMS(KP559016994, TQ, TP);
Chris@82 126 cr[WS(rs, 3)] = FMS(KP951056516, TU, TR);
Chris@82 127 ci[WS(rs, 2)] = FMA(KP951056516, TU, TR);
Chris@82 128 }
Chris@82 129 }
Chris@82 130 }
Chris@82 131 }
Chris@82 132 }
Chris@82 133
Chris@82 134 static const tw_instr twinstr[] = {
Chris@82 135 {TW_CEXP, 1, 1},
Chris@82 136 {TW_CEXP, 1, 3},
Chris@82 137 {TW_NEXT, 1, 0}
Chris@82 138 };
Chris@82 139
Chris@82 140 static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {14, 10, 30, 0} };
Chris@82 141
Chris@82 142 void X(codelet_hf2_5) (planner *p) {
Chris@82 143 X(khc2hc_register) (p, hf2_5, &desc);
Chris@82 144 }
Chris@82 145 #else
Chris@82 146
Chris@82 147 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include rdft/scalar/hf.h */
Chris@82 148
Chris@82 149 /*
Chris@82 150 * This function contains 44 FP additions, 32 FP multiplications,
Chris@82 151 * (or, 30 additions, 18 multiplications, 14 fused multiply/add),
Chris@82 152 * 37 stack variables, 4 constants, and 20 memory accesses
Chris@82 153 */
Chris@82 154 #include "rdft/scalar/hf.h"
Chris@82 155
Chris@82 156 static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 157 {
Chris@82 158 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 159 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 160 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 161 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 162 {
Chris@82 163 INT m;
Chris@82 164 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@82 165 E T2, T4, T7, T9, Tb, Tl, Tf, Tj;
Chris@82 166 {
Chris@82 167 E T8, Te, Ta, Td;
Chris@82 168 T2 = W[0];
Chris@82 169 T4 = W[1];
Chris@82 170 T7 = W[2];
Chris@82 171 T9 = W[3];
Chris@82 172 T8 = T2 * T7;
Chris@82 173 Te = T4 * T7;
Chris@82 174 Ta = T4 * T9;
Chris@82 175 Td = T2 * T9;
Chris@82 176 Tb = T8 - Ta;
Chris@82 177 Tl = Td - Te;
Chris@82 178 Tf = Td + Te;
Chris@82 179 Tj = T8 + Ta;
Chris@82 180 }
Chris@82 181 {
Chris@82 182 E T1, TI, Ty, TB, TG, TF, TJ, TK, TL, Ti, Tr, Ts;
Chris@82 183 T1 = cr[0];
Chris@82 184 TI = ci[0];
Chris@82 185 {
Chris@82 186 E T6, Tw, Tq, TA, Th, Tx, Tn, Tz;
Chris@82 187 {
Chris@82 188 E T3, T5, To, Tp;
Chris@82 189 T3 = cr[WS(rs, 1)];
Chris@82 190 T5 = ci[WS(rs, 1)];
Chris@82 191 T6 = FMA(T2, T3, T4 * T5);
Chris@82 192 Tw = FNMS(T4, T3, T2 * T5);
Chris@82 193 To = cr[WS(rs, 3)];
Chris@82 194 Tp = ci[WS(rs, 3)];
Chris@82 195 Tq = FMA(T7, To, T9 * Tp);
Chris@82 196 TA = FNMS(T9, To, T7 * Tp);
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E Tc, Tg, Tk, Tm;
Chris@82 200 Tc = cr[WS(rs, 4)];
Chris@82 201 Tg = ci[WS(rs, 4)];
Chris@82 202 Th = FMA(Tb, Tc, Tf * Tg);
Chris@82 203 Tx = FNMS(Tf, Tc, Tb * Tg);
Chris@82 204 Tk = cr[WS(rs, 2)];
Chris@82 205 Tm = ci[WS(rs, 2)];
Chris@82 206 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 207 Tz = FNMS(Tl, Tk, Tj * Tm);
Chris@82 208 }
Chris@82 209 Ty = Tw - Tx;
Chris@82 210 TB = Tz - TA;
Chris@82 211 TG = Tn - Tq;
Chris@82 212 TF = Th - T6;
Chris@82 213 TJ = Tw + Tx;
Chris@82 214 TK = Tz + TA;
Chris@82 215 TL = TJ + TK;
Chris@82 216 Ti = T6 + Th;
Chris@82 217 Tr = Tn + Tq;
Chris@82 218 Ts = Ti + Tr;
Chris@82 219 }
Chris@82 220 cr[0] = T1 + Ts;
Chris@82 221 {
Chris@82 222 E TC, TE, Tv, TD, Tt, Tu;
Chris@82 223 TC = FMA(KP951056516, Ty, KP587785252 * TB);
Chris@82 224 TE = FNMS(KP587785252, Ty, KP951056516 * TB);
Chris@82 225 Tt = KP559016994 * (Ti - Tr);
Chris@82 226 Tu = FNMS(KP250000000, Ts, T1);
Chris@82 227 Tv = Tt + Tu;
Chris@82 228 TD = Tu - Tt;
Chris@82 229 ci[0] = Tv - TC;
Chris@82 230 ci[WS(rs, 1)] = TD + TE;
Chris@82 231 cr[WS(rs, 1)] = Tv + TC;
Chris@82 232 cr[WS(rs, 2)] = TD - TE;
Chris@82 233 }
Chris@82 234 ci[WS(rs, 4)] = TL + TI;
Chris@82 235 {
Chris@82 236 E TH, TP, TO, TQ, TM, TN;
Chris@82 237 TH = FMA(KP587785252, TF, KP951056516 * TG);
Chris@82 238 TP = FNMS(KP587785252, TG, KP951056516 * TF);
Chris@82 239 TM = FNMS(KP250000000, TL, TI);
Chris@82 240 TN = KP559016994 * (TJ - TK);
Chris@82 241 TO = TM - TN;
Chris@82 242 TQ = TN + TM;
Chris@82 243 cr[WS(rs, 3)] = TH - TO;
Chris@82 244 ci[WS(rs, 3)] = TP + TQ;
Chris@82 245 ci[WS(rs, 2)] = TH + TO;
Chris@82 246 cr[WS(rs, 4)] = TP - TQ;
Chris@82 247 }
Chris@82 248 }
Chris@82 249 }
Chris@82 250 }
Chris@82 251 }
Chris@82 252
Chris@82 253 static const tw_instr twinstr[] = {
Chris@82 254 {TW_CEXP, 1, 1},
Chris@82 255 {TW_CEXP, 1, 3},
Chris@82 256 {TW_NEXT, 1, 0}
Chris@82 257 };
Chris@82 258
Chris@82 259 static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {30, 18, 14, 0} };
Chris@82 260
Chris@82 261 void X(codelet_hf2_5) (planner *p) {
Chris@82 262 X(khc2hc_register) (p, hf2_5, &desc);
Chris@82 263 }
Chris@82 264 #endif