annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:05 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 62 FP additions, 36 FP multiplications,
Chris@42 32 * (or, 32 additions, 6 multiplications, 30 fused multiply/add),
Chris@42 33 * 45 stack variables, 6 constants, and 28 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 40 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@42 41 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@42 42 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 43 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@42 44 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@42 45 {
Chris@42 46 INT i;
Chris@42 47 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@42 48 E TN, T3, TG, TQ, Tx, To, TH, Td, TD, TO, Tw, Ta, TL, Ty, TT;
Chris@42 49 E TI, Tg, Tr, Te, Tf, TP, TJ;
Chris@42 50 {
Chris@42 51 E Tl, TE, Tk, Tm;
Chris@42 52 {
Chris@42 53 E T1, T2, Ti, Tj;
Chris@42 54 T1 = R0[0];
Chris@42 55 T2 = R1[WS(rs, 3)];
Chris@42 56 Ti = R0[WS(rs, 3)];
Chris@42 57 Tj = R1[WS(rs, 6)];
Chris@42 58 Tl = R0[WS(rs, 4)];
Chris@42 59 TN = T1 + T2;
Chris@42 60 T3 = T1 - T2;
Chris@42 61 TE = Ti + Tj;
Chris@42 62 Tk = Ti - Tj;
Chris@42 63 Tm = R1[0];
Chris@42 64 }
Chris@42 65 {
Chris@42 66 E T7, TC, T6, T8;
Chris@42 67 {
Chris@42 68 E T4, T5, TF, Tn;
Chris@42 69 T4 = R0[WS(rs, 1)];
Chris@42 70 T5 = R1[WS(rs, 4)];
Chris@42 71 T7 = R0[WS(rs, 6)];
Chris@42 72 TF = Tl + Tm;
Chris@42 73 Tn = Tl - Tm;
Chris@42 74 TC = T4 + T5;
Chris@42 75 T6 = T4 - T5;
Chris@42 76 TG = TE - TF;
Chris@42 77 TQ = TE + TF;
Chris@42 78 Tx = Tn - Tk;
Chris@42 79 To = Tk + Tn;
Chris@42 80 T8 = R1[WS(rs, 2)];
Chris@42 81 }
Chris@42 82 {
Chris@42 83 E Tb, Tc, TB, T9;
Chris@42 84 Tb = R0[WS(rs, 2)];
Chris@42 85 Tc = R1[WS(rs, 5)];
Chris@42 86 Te = R0[WS(rs, 5)];
Chris@42 87 TB = T7 + T8;
Chris@42 88 T9 = T7 - T8;
Chris@42 89 TH = Tb + Tc;
Chris@42 90 Td = Tb - Tc;
Chris@42 91 TD = TB - TC;
Chris@42 92 TO = TC + TB;
Chris@42 93 Tw = T6 - T9;
Chris@42 94 Ta = T6 + T9;
Chris@42 95 Tf = R1[WS(rs, 1)];
Chris@42 96 }
Chris@42 97 }
Chris@42 98 }
Chris@42 99 TL = FNMS(KP554958132, TG, TD);
Chris@42 100 Ty = FNMS(KP554958132, Tx, Tw);
Chris@42 101 TT = FNMS(KP356895867, TO, TQ);
Chris@42 102 TI = Te + Tf;
Chris@42 103 Tg = Te - Tf;
Chris@42 104 Tr = FNMS(KP356895867, Ta, To);
Chris@42 105 TP = TH + TI;
Chris@42 106 TJ = TH - TI;
Chris@42 107 {
Chris@42 108 E Th, Tv, TK, TM;
Chris@42 109 Th = Td + Tg;
Chris@42 110 Tv = Tg - Td;
Chris@42 111 TK = FMA(KP554958132, TJ, TG);
Chris@42 112 TM = FMA(KP554958132, TD, TJ);
Chris@42 113 Ci[WS(csi, 6)] = KP974927912 * (FNMS(KP801937735, TL, TJ));
Chris@42 114 {
Chris@42 115 E TR, TV, TU, Tz;
Chris@42 116 TR = FNMS(KP356895867, TQ, TP);
Chris@42 117 TV = FNMS(KP356895867, TP, TO);
Chris@42 118 TU = FNMS(KP692021471, TT, TP);
Chris@42 119 Cr[0] = TN + TO + TP + TQ;
Chris@42 120 Tz = FMA(KP554958132, Tv, Tx);
Chris@42 121 Ci[WS(csi, 1)] = KP974927912 * (FNMS(KP801937735, Ty, Tv));
Chris@42 122 {
Chris@42 123 E TA, Ts, Tt, Tp;
Chris@42 124 TA = FMA(KP554958132, Tw, Tv);
Chris@42 125 Ts = FNMS(KP692021471, Tr, Th);
Chris@42 126 Tt = FNMS(KP356895867, Th, Ta);
Chris@42 127 Tp = FNMS(KP356895867, To, Th);
Chris@42 128 Cr[WS(csr, 7)] = T3 + Ta + Th + To;
Chris@42 129 Ci[WS(csi, 2)] = KP974927912 * (FMA(KP801937735, TK, TD));
Chris@42 130 Ci[WS(csi, 4)] = KP974927912 * (FNMS(KP801937735, TM, TG));
Chris@42 131 {
Chris@42 132 E TS, TW, Tu, Tq;
Chris@42 133 TS = FNMS(KP692021471, TR, TO);
Chris@42 134 TW = FNMS(KP692021471, TV, TQ);
Chris@42 135 Cr[WS(csr, 2)] = FNMS(KP900968867, TU, TN);
Chris@42 136 Ci[WS(csi, 5)] = KP974927912 * (FMA(KP801937735, Tz, Tw));
Chris@42 137 Ci[WS(csi, 3)] = KP974927912 * (FNMS(KP801937735, TA, Tx));
Chris@42 138 Cr[WS(csr, 5)] = FNMS(KP900968867, Ts, T3);
Chris@42 139 Tu = FNMS(KP692021471, Tt, To);
Chris@42 140 Tq = FNMS(KP692021471, Tp, Ta);
Chris@42 141 Cr[WS(csr, 4)] = FNMS(KP900968867, TS, TN);
Chris@42 142 Cr[WS(csr, 6)] = FNMS(KP900968867, TW, TN);
Chris@42 143 Cr[WS(csr, 1)] = FNMS(KP900968867, Tu, T3);
Chris@42 144 Cr[WS(csr, 3)] = FNMS(KP900968867, Tq, T3);
Chris@42 145 }
Chris@42 146 }
Chris@42 147 }
Chris@42 148 }
Chris@42 149 }
Chris@42 150 }
Chris@42 151 }
Chris@42 152
Chris@42 153 static const kr2c_desc desc = { 14, "r2cf_14", {32, 6, 30, 0}, &GENUS };
Chris@42 154
Chris@42 155 void X(codelet_r2cf_14) (planner *p) {
Chris@42 156 X(kr2c_register) (p, r2cf_14, &desc);
Chris@42 157 }
Chris@42 158
Chris@42 159 #else /* HAVE_FMA */
Chris@42 160
Chris@42 161 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include r2cf.h */
Chris@42 162
Chris@42 163 /*
Chris@42 164 * This function contains 62 FP additions, 36 FP multiplications,
Chris@42 165 * (or, 38 additions, 12 multiplications, 24 fused multiply/add),
Chris@42 166 * 29 stack variables, 6 constants, and 28 memory accesses
Chris@42 167 */
Chris@42 168 #include "r2cf.h"
Chris@42 169
Chris@42 170 static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 171 {
Chris@42 172 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 173 DK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@42 174 DK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@42 175 DK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@42 176 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 177 DK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@42 178 {
Chris@42 179 INT i;
Chris@42 180 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@42 181 E T3, TB, T6, Tv, Tn, Ts, Tk, Tt, Td, Ty, T9, Tw, Tg, Tz, T1;
Chris@42 182 E T2;
Chris@42 183 T1 = R0[0];
Chris@42 184 T2 = R1[WS(rs, 3)];
Chris@42 185 T3 = T1 - T2;
Chris@42 186 TB = T1 + T2;
Chris@42 187 {
Chris@42 188 E T4, T5, Tl, Tm;
Chris@42 189 T4 = R0[WS(rs, 2)];
Chris@42 190 T5 = R1[WS(rs, 5)];
Chris@42 191 T6 = T4 - T5;
Chris@42 192 Tv = T4 + T5;
Chris@42 193 Tl = R0[WS(rs, 6)];
Chris@42 194 Tm = R1[WS(rs, 2)];
Chris@42 195 Tn = Tl - Tm;
Chris@42 196 Ts = Tl + Tm;
Chris@42 197 }
Chris@42 198 {
Chris@42 199 E Ti, Tj, Tb, Tc;
Chris@42 200 Ti = R0[WS(rs, 1)];
Chris@42 201 Tj = R1[WS(rs, 4)];
Chris@42 202 Tk = Ti - Tj;
Chris@42 203 Tt = Ti + Tj;
Chris@42 204 Tb = R0[WS(rs, 3)];
Chris@42 205 Tc = R1[WS(rs, 6)];
Chris@42 206 Td = Tb - Tc;
Chris@42 207 Ty = Tb + Tc;
Chris@42 208 }
Chris@42 209 {
Chris@42 210 E T7, T8, Te, Tf;
Chris@42 211 T7 = R0[WS(rs, 5)];
Chris@42 212 T8 = R1[WS(rs, 1)];
Chris@42 213 T9 = T7 - T8;
Chris@42 214 Tw = T7 + T8;
Chris@42 215 Te = R0[WS(rs, 4)];
Chris@42 216 Tf = R1[0];
Chris@42 217 Tg = Te - Tf;
Chris@42 218 Tz = Te + Tf;
Chris@42 219 }
Chris@42 220 {
Chris@42 221 E Tp, Tr, Tq, Ta, To, Th;
Chris@42 222 Tp = Tn - Tk;
Chris@42 223 Tr = Tg - Td;
Chris@42 224 Tq = T9 - T6;
Chris@42 225 Ci[WS(csi, 1)] = FMA(KP781831482, Tp, KP974927912 * Tq) + (KP433883739 * Tr);
Chris@42 226 Ci[WS(csi, 5)] = FMA(KP433883739, Tq, KP781831482 * Tr) - (KP974927912 * Tp);
Chris@42 227 Ci[WS(csi, 3)] = FMA(KP433883739, Tp, KP974927912 * Tr) - (KP781831482 * Tq);
Chris@42 228 Ta = T6 + T9;
Chris@42 229 To = Tk + Tn;
Chris@42 230 Th = Td + Tg;
Chris@42 231 Cr[WS(csr, 3)] = FMA(KP623489801, Ta, T3) + FNMA(KP222520933, Th, KP900968867 * To);
Chris@42 232 Cr[WS(csr, 7)] = T3 + To + Ta + Th;
Chris@42 233 Cr[WS(csr, 1)] = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
Chris@42 234 Cr[WS(csr, 5)] = FMA(KP623489801, Th, T3) + FNMA(KP900968867, Ta, KP222520933 * To);
Chris@42 235 }
Chris@42 236 {
Chris@42 237 E Tu, TA, Tx, TC, TE, TD;
Chris@42 238 Tu = Ts - Tt;
Chris@42 239 TA = Ty - Tz;
Chris@42 240 Tx = Tv - Tw;
Chris@42 241 Ci[WS(csi, 2)] = FMA(KP974927912, Tu, KP433883739 * Tx) + (KP781831482 * TA);
Chris@42 242 Ci[WS(csi, 6)] = FMA(KP974927912, Tx, KP433883739 * TA) - (KP781831482 * Tu);
Chris@42 243 Ci[WS(csi, 4)] = FNMS(KP781831482, Tx, KP974927912 * TA) - (KP433883739 * Tu);
Chris@42 244 TC = Tt + Ts;
Chris@42 245 TE = Tv + Tw;
Chris@42 246 TD = Ty + Tz;
Chris@42 247 Cr[WS(csr, 6)] = FMA(KP623489801, TC, TB) + FNMA(KP900968867, TD, KP222520933 * TE);
Chris@42 248 Cr[WS(csr, 2)] = FMA(KP623489801, TD, TB) + FNMA(KP900968867, TE, KP222520933 * TC);
Chris@42 249 Cr[WS(csr, 4)] = FMA(KP623489801, TE, TB) + FNMA(KP222520933, TD, KP900968867 * TC);
Chris@42 250 Cr[0] = TB + TC + TE + TD;
Chris@42 251 }
Chris@42 252 }
Chris@42 253 }
Chris@42 254 }
Chris@42 255
Chris@42 256 static const kr2c_desc desc = { 14, "r2cf_14", {38, 12, 24, 0}, &GENUS };
Chris@42 257
Chris@42 258 void X(codelet_r2cf_14) (planner *p) {
Chris@42 259 X(kr2c_register) (p, r2cf_14, &desc);
Chris@42 260 }
Chris@42 261
Chris@42 262 #endif /* HAVE_FMA */