annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf2_5.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:47:00 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 44 FP additions, 40 FP multiplications,
Chris@42 32 * (or, 14 additions, 10 multiplications, 30 fused multiply/add),
Chris@42 33 * 47 stack variables, 4 constants, and 20 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 43 {
Chris@42 44 INT m;
Chris@42 45 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@42 46 E Ta, T1, TL, Tp, TT, Ti, TM, TC, To, TE, Ts, TF, T2, T8, T5;
Chris@42 47 E TS, Tt, TG;
Chris@42 48 T2 = W[0];
Chris@42 49 Ta = W[3];
Chris@42 50 T8 = W[2];
Chris@42 51 T5 = W[1];
Chris@42 52 {
Chris@42 53 E Tq, Tr, Te, T9;
Chris@42 54 T1 = cr[0];
Chris@42 55 Te = T2 * Ta;
Chris@42 56 T9 = T2 * T8;
Chris@42 57 TL = ci[0];
Chris@42 58 {
Chris@42 59 E T3, Tf, Tm, Tj, Tb, T4, T6, Tc, Tg;
Chris@42 60 T3 = cr[WS(rs, 1)];
Chris@42 61 Tf = FMA(T5, T8, Te);
Chris@42 62 Tm = FNMS(T5, T8, Te);
Chris@42 63 Tj = FMA(T5, Ta, T9);
Chris@42 64 Tb = FNMS(T5, Ta, T9);
Chris@42 65 T4 = T2 * T3;
Chris@42 66 T6 = ci[WS(rs, 1)];
Chris@42 67 Tc = cr[WS(rs, 4)];
Chris@42 68 Tg = ci[WS(rs, 4)];
Chris@42 69 {
Chris@42 70 E Tk, Tl, Tn, TD;
Chris@42 71 {
Chris@42 72 E T7, Tz, Th, TB, Ty, Td, TA;
Chris@42 73 Tk = cr[WS(rs, 2)];
Chris@42 74 T7 = FMA(T5, T6, T4);
Chris@42 75 Ty = T2 * T6;
Chris@42 76 Td = Tb * Tc;
Chris@42 77 TA = Tb * Tg;
Chris@42 78 Tl = Tj * Tk;
Chris@42 79 Tz = FNMS(T5, T3, Ty);
Chris@42 80 Th = FMA(Tf, Tg, Td);
Chris@42 81 TB = FNMS(Tf, Tc, TA);
Chris@42 82 Tn = ci[WS(rs, 2)];
Chris@42 83 Tp = cr[WS(rs, 3)];
Chris@42 84 TT = Th - T7;
Chris@42 85 Ti = T7 + Th;
Chris@42 86 TM = Tz + TB;
Chris@42 87 TC = Tz - TB;
Chris@42 88 TD = Tj * Tn;
Chris@42 89 Tq = T8 * Tp;
Chris@42 90 Tr = ci[WS(rs, 3)];
Chris@42 91 }
Chris@42 92 To = FMA(Tm, Tn, Tl);
Chris@42 93 TE = FNMS(Tm, Tk, TD);
Chris@42 94 }
Chris@42 95 }
Chris@42 96 Ts = FMA(Ta, Tr, Tq);
Chris@42 97 TF = T8 * Tr;
Chris@42 98 }
Chris@42 99 TS = To - Ts;
Chris@42 100 Tt = To + Ts;
Chris@42 101 TG = FNMS(Ta, Tp, TF);
Chris@42 102 {
Chris@42 103 E TU, TW, TV, TR, Tw, Tu;
Chris@42 104 TU = FMA(KP618033988, TT, TS);
Chris@42 105 TW = FNMS(KP618033988, TS, TT);
Chris@42 106 Tw = Ti - Tt;
Chris@42 107 Tu = Ti + Tt;
Chris@42 108 {
Chris@42 109 E TN, TH, Tv, TI, TK;
Chris@42 110 TN = TE + TG;
Chris@42 111 TH = TE - TG;
Chris@42 112 cr[0] = T1 + Tu;
Chris@42 113 Tv = FNMS(KP250000000, Tu, T1);
Chris@42 114 TI = FMA(KP618033988, TH, TC);
Chris@42 115 TK = FNMS(KP618033988, TC, TH);
Chris@42 116 {
Chris@42 117 E TQ, TO, Tx, TJ, TP;
Chris@42 118 TQ = TM - TN;
Chris@42 119 TO = TM + TN;
Chris@42 120 Tx = FMA(KP559016994, Tw, Tv);
Chris@42 121 TJ = FNMS(KP559016994, Tw, Tv);
Chris@42 122 ci[WS(rs, 4)] = TO + TL;
Chris@42 123 TP = FNMS(KP250000000, TO, TL);
Chris@42 124 ci[WS(rs, 1)] = FMA(KP951056516, TK, TJ);
Chris@42 125 cr[WS(rs, 2)] = FNMS(KP951056516, TK, TJ);
Chris@42 126 cr[WS(rs, 1)] = FMA(KP951056516, TI, Tx);
Chris@42 127 ci[0] = FNMS(KP951056516, TI, Tx);
Chris@42 128 TV = FMA(KP559016994, TQ, TP);
Chris@42 129 TR = FNMS(KP559016994, TQ, TP);
Chris@42 130 }
Chris@42 131 }
Chris@42 132 ci[WS(rs, 2)] = FMA(KP951056516, TU, TR);
Chris@42 133 cr[WS(rs, 3)] = FMS(KP951056516, TU, TR);
Chris@42 134 ci[WS(rs, 3)] = FMA(KP951056516, TW, TV);
Chris@42 135 cr[WS(rs, 4)] = FMS(KP951056516, TW, TV);
Chris@42 136 }
Chris@42 137 }
Chris@42 138 }
Chris@42 139 }
Chris@42 140
Chris@42 141 static const tw_instr twinstr[] = {
Chris@42 142 {TW_CEXP, 1, 1},
Chris@42 143 {TW_CEXP, 1, 3},
Chris@42 144 {TW_NEXT, 1, 0}
Chris@42 145 };
Chris@42 146
Chris@42 147 static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {14, 10, 30, 0} };
Chris@42 148
Chris@42 149 void X(codelet_hf2_5) (planner *p) {
Chris@42 150 X(khc2hc_register) (p, hf2_5, &desc);
Chris@42 151 }
Chris@42 152 #else /* HAVE_FMA */
Chris@42 153
Chris@42 154 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -dit -name hf2_5 -include hf.h */
Chris@42 155
Chris@42 156 /*
Chris@42 157 * This function contains 44 FP additions, 32 FP multiplications,
Chris@42 158 * (or, 30 additions, 18 multiplications, 14 fused multiply/add),
Chris@42 159 * 37 stack variables, 4 constants, and 20 memory accesses
Chris@42 160 */
Chris@42 161 #include "hf.h"
Chris@42 162
Chris@42 163 static void hf2_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 164 {
Chris@42 165 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 166 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 167 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 168 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 169 {
Chris@42 170 INT m;
Chris@42 171 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
Chris@42 172 E T2, T4, T7, T9, Tb, Tl, Tf, Tj;
Chris@42 173 {
Chris@42 174 E T8, Te, Ta, Td;
Chris@42 175 T2 = W[0];
Chris@42 176 T4 = W[1];
Chris@42 177 T7 = W[2];
Chris@42 178 T9 = W[3];
Chris@42 179 T8 = T2 * T7;
Chris@42 180 Te = T4 * T7;
Chris@42 181 Ta = T4 * T9;
Chris@42 182 Td = T2 * T9;
Chris@42 183 Tb = T8 - Ta;
Chris@42 184 Tl = Td - Te;
Chris@42 185 Tf = Td + Te;
Chris@42 186 Tj = T8 + Ta;
Chris@42 187 }
Chris@42 188 {
Chris@42 189 E T1, TI, Ty, TB, TG, TF, TJ, TK, TL, Ti, Tr, Ts;
Chris@42 190 T1 = cr[0];
Chris@42 191 TI = ci[0];
Chris@42 192 {
Chris@42 193 E T6, Tw, Tq, TA, Th, Tx, Tn, Tz;
Chris@42 194 {
Chris@42 195 E T3, T5, To, Tp;
Chris@42 196 T3 = cr[WS(rs, 1)];
Chris@42 197 T5 = ci[WS(rs, 1)];
Chris@42 198 T6 = FMA(T2, T3, T4 * T5);
Chris@42 199 Tw = FNMS(T4, T3, T2 * T5);
Chris@42 200 To = cr[WS(rs, 3)];
Chris@42 201 Tp = ci[WS(rs, 3)];
Chris@42 202 Tq = FMA(T7, To, T9 * Tp);
Chris@42 203 TA = FNMS(T9, To, T7 * Tp);
Chris@42 204 }
Chris@42 205 {
Chris@42 206 E Tc, Tg, Tk, Tm;
Chris@42 207 Tc = cr[WS(rs, 4)];
Chris@42 208 Tg = ci[WS(rs, 4)];
Chris@42 209 Th = FMA(Tb, Tc, Tf * Tg);
Chris@42 210 Tx = FNMS(Tf, Tc, Tb * Tg);
Chris@42 211 Tk = cr[WS(rs, 2)];
Chris@42 212 Tm = ci[WS(rs, 2)];
Chris@42 213 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 214 Tz = FNMS(Tl, Tk, Tj * Tm);
Chris@42 215 }
Chris@42 216 Ty = Tw - Tx;
Chris@42 217 TB = Tz - TA;
Chris@42 218 TG = Tn - Tq;
Chris@42 219 TF = Th - T6;
Chris@42 220 TJ = Tw + Tx;
Chris@42 221 TK = Tz + TA;
Chris@42 222 TL = TJ + TK;
Chris@42 223 Ti = T6 + Th;
Chris@42 224 Tr = Tn + Tq;
Chris@42 225 Ts = Ti + Tr;
Chris@42 226 }
Chris@42 227 cr[0] = T1 + Ts;
Chris@42 228 {
Chris@42 229 E TC, TE, Tv, TD, Tt, Tu;
Chris@42 230 TC = FMA(KP951056516, Ty, KP587785252 * TB);
Chris@42 231 TE = FNMS(KP587785252, Ty, KP951056516 * TB);
Chris@42 232 Tt = KP559016994 * (Ti - Tr);
Chris@42 233 Tu = FNMS(KP250000000, Ts, T1);
Chris@42 234 Tv = Tt + Tu;
Chris@42 235 TD = Tu - Tt;
Chris@42 236 ci[0] = Tv - TC;
Chris@42 237 ci[WS(rs, 1)] = TD + TE;
Chris@42 238 cr[WS(rs, 1)] = Tv + TC;
Chris@42 239 cr[WS(rs, 2)] = TD - TE;
Chris@42 240 }
Chris@42 241 ci[WS(rs, 4)] = TL + TI;
Chris@42 242 {
Chris@42 243 E TH, TP, TO, TQ, TM, TN;
Chris@42 244 TH = FMA(KP587785252, TF, KP951056516 * TG);
Chris@42 245 TP = FNMS(KP587785252, TG, KP951056516 * TF);
Chris@42 246 TM = FNMS(KP250000000, TL, TI);
Chris@42 247 TN = KP559016994 * (TJ - TK);
Chris@42 248 TO = TM - TN;
Chris@42 249 TQ = TN + TM;
Chris@42 250 cr[WS(rs, 3)] = TH - TO;
Chris@42 251 ci[WS(rs, 3)] = TP + TQ;
Chris@42 252 ci[WS(rs, 2)] = TH + TO;
Chris@42 253 cr[WS(rs, 4)] = TP - TQ;
Chris@42 254 }
Chris@42 255 }
Chris@42 256 }
Chris@42 257 }
Chris@42 258 }
Chris@42 259
Chris@42 260 static const tw_instr twinstr[] = {
Chris@42 261 {TW_CEXP, 1, 1},
Chris@42 262 {TW_CEXP, 1, 3},
Chris@42 263 {TW_NEXT, 1, 0}
Chris@42 264 };
Chris@42 265
Chris@42 266 static const hc2hc_desc desc = { 5, "hf2_5", twinstr, &GENUS, {30, 18, 14, 0} };
Chris@42 267
Chris@42 268 void X(codelet_hf2_5) (planner *p) {
Chris@42 269 X(khc2hc_register) (p, hf2_5, &desc);
Chris@42 270 }
Chris@42 271 #endif /* HAVE_FMA */