annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:26 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 62 FP additions, 36 FP multiplications,
Chris@82 32 * (or, 32 additions, 6 multiplications, 30 fused multiply/add),
Chris@82 33 * 33 stack variables, 6 constants, and 28 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 40 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 41 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 42 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 43 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 44 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 45 {
Chris@82 46 INT i;
Chris@82 47 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@82 48 E T3, TN, To, TQ, Tx, TG, Ta, TO, Tw, TD, Th, TP, Tv, TJ, T1;
Chris@82 49 E T2, TA, TK;
Chris@82 50 T1 = R0[0];
Chris@82 51 T2 = R1[WS(rs, 3)];
Chris@82 52 T3 = T1 - T2;
Chris@82 53 TN = T1 + T2;
Chris@82 54 {
Chris@82 55 E Tk, TE, Tn, TF;
Chris@82 56 {
Chris@82 57 E Ti, Tj, Tl, Tm;
Chris@82 58 Ti = R0[WS(rs, 3)];
Chris@82 59 Tj = R1[WS(rs, 6)];
Chris@82 60 Tk = Ti - Tj;
Chris@82 61 TE = Ti + Tj;
Chris@82 62 Tl = R0[WS(rs, 4)];
Chris@82 63 Tm = R1[0];
Chris@82 64 Tn = Tl - Tm;
Chris@82 65 TF = Tl + Tm;
Chris@82 66 }
Chris@82 67 To = Tk + Tn;
Chris@82 68 TQ = TE + TF;
Chris@82 69 Tx = Tn - Tk;
Chris@82 70 TG = TE - TF;
Chris@82 71 }
Chris@82 72 {
Chris@82 73 E T6, TC, T9, TB;
Chris@82 74 {
Chris@82 75 E T4, T5, T7, T8;
Chris@82 76 T4 = R0[WS(rs, 1)];
Chris@82 77 T5 = R1[WS(rs, 4)];
Chris@82 78 T6 = T4 - T5;
Chris@82 79 TC = T4 + T5;
Chris@82 80 T7 = R0[WS(rs, 6)];
Chris@82 81 T8 = R1[WS(rs, 2)];
Chris@82 82 T9 = T7 - T8;
Chris@82 83 TB = T7 + T8;
Chris@82 84 }
Chris@82 85 Ta = T6 + T9;
Chris@82 86 TO = TC + TB;
Chris@82 87 Tw = T6 - T9;
Chris@82 88 TD = TB - TC;
Chris@82 89 }
Chris@82 90 {
Chris@82 91 E Td, TH, Tg, TI;
Chris@82 92 {
Chris@82 93 E Tb, Tc, Te, Tf;
Chris@82 94 Tb = R0[WS(rs, 2)];
Chris@82 95 Tc = R1[WS(rs, 5)];
Chris@82 96 Td = Tb - Tc;
Chris@82 97 TH = Tb + Tc;
Chris@82 98 Te = R0[WS(rs, 5)];
Chris@82 99 Tf = R1[WS(rs, 1)];
Chris@82 100 Tg = Te - Tf;
Chris@82 101 TI = Te + Tf;
Chris@82 102 }
Chris@82 103 Th = Td + Tg;
Chris@82 104 TP = TH + TI;
Chris@82 105 Tv = Tg - Td;
Chris@82 106 TJ = TH - TI;
Chris@82 107 }
Chris@82 108 Cr[WS(csr, 7)] = T3 + Ta + Th + To;
Chris@82 109 Cr[0] = TN + TO + TP + TQ;
Chris@82 110 TA = FMA(KP554958132, Tw, Tv);
Chris@82 111 Ci[WS(csi, 3)] = KP974927912 * (FNMS(KP801937735, TA, Tx));
Chris@82 112 {
Chris@82 113 E TL, TM, Ty, Tz;
Chris@82 114 TL = FNMS(KP554958132, TG, TD);
Chris@82 115 Ci[WS(csi, 6)] = KP974927912 * (FNMS(KP801937735, TL, TJ));
Chris@82 116 TM = FMA(KP554958132, TD, TJ);
Chris@82 117 Ci[WS(csi, 4)] = KP974927912 * (FNMS(KP801937735, TM, TG));
Chris@82 118 Ty = FNMS(KP554958132, Tx, Tw);
Chris@82 119 Ci[WS(csi, 1)] = KP974927912 * (FNMS(KP801937735, Ty, Tv));
Chris@82 120 Tz = FMA(KP554958132, Tv, Tx);
Chris@82 121 Ci[WS(csi, 5)] = KP974927912 * (FMA(KP801937735, Tz, Tw));
Chris@82 122 }
Chris@82 123 TK = FMA(KP554958132, TJ, TG);
Chris@82 124 Ci[WS(csi, 2)] = KP974927912 * (FMA(KP801937735, TK, TD));
Chris@82 125 {
Chris@82 126 E TU, TT, Tq, Tp;
Chris@82 127 TT = FNMS(KP356895867, TO, TQ);
Chris@82 128 TU = FNMS(KP692021471, TT, TP);
Chris@82 129 Cr[WS(csr, 2)] = FNMS(KP900968867, TU, TN);
Chris@82 130 Tp = FNMS(KP356895867, To, Th);
Chris@82 131 Tq = FNMS(KP692021471, Tp, Ta);
Chris@82 132 Cr[WS(csr, 3)] = FNMS(KP900968867, Tq, T3);
Chris@82 133 }
Chris@82 134 {
Chris@82 135 E Tu, Tt, Ts, Tr;
Chris@82 136 Tt = FNMS(KP356895867, Th, Ta);
Chris@82 137 Tu = FNMS(KP692021471, Tt, To);
Chris@82 138 Cr[WS(csr, 1)] = FNMS(KP900968867, Tu, T3);
Chris@82 139 Tr = FNMS(KP356895867, Ta, To);
Chris@82 140 Ts = FNMS(KP692021471, Tr, Th);
Chris@82 141 Cr[WS(csr, 5)] = FNMS(KP900968867, Ts, T3);
Chris@82 142 }
Chris@82 143 {
Chris@82 144 E TW, TV, TS, TR;
Chris@82 145 TV = FNMS(KP356895867, TP, TO);
Chris@82 146 TW = FNMS(KP692021471, TV, TQ);
Chris@82 147 Cr[WS(csr, 6)] = FNMS(KP900968867, TW, TN);
Chris@82 148 TR = FNMS(KP356895867, TQ, TP);
Chris@82 149 TS = FNMS(KP692021471, TR, TO);
Chris@82 150 Cr[WS(csr, 4)] = FNMS(KP900968867, TS, TN);
Chris@82 151 }
Chris@82 152 }
Chris@82 153 }
Chris@82 154 }
Chris@82 155
Chris@82 156 static const kr2c_desc desc = { 14, "r2cf_14", {32, 6, 30, 0}, &GENUS };
Chris@82 157
Chris@82 158 void X(codelet_r2cf_14) (planner *p) {
Chris@82 159 X(kr2c_register) (p, r2cf_14, &desc);
Chris@82 160 }
Chris@82 161
Chris@82 162 #else
Chris@82 163
Chris@82 164 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 14 -name r2cf_14 -include rdft/scalar/r2cf.h */
Chris@82 165
Chris@82 166 /*
Chris@82 167 * This function contains 62 FP additions, 36 FP multiplications,
Chris@82 168 * (or, 38 additions, 12 multiplications, 24 fused multiply/add),
Chris@82 169 * 29 stack variables, 6 constants, and 28 memory accesses
Chris@82 170 */
Chris@82 171 #include "rdft/scalar/r2cf.h"
Chris@82 172
Chris@82 173 static void r2cf_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 174 {
Chris@82 175 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 176 DK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@82 177 DK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@82 178 DK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@82 179 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 180 DK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@82 181 {
Chris@82 182 INT i;
Chris@82 183 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@82 184 E T3, TB, T6, Tv, Tn, Ts, Tk, Tt, Td, Ty, T9, Tw, Tg, Tz, T1;
Chris@82 185 E T2;
Chris@82 186 T1 = R0[0];
Chris@82 187 T2 = R1[WS(rs, 3)];
Chris@82 188 T3 = T1 - T2;
Chris@82 189 TB = T1 + T2;
Chris@82 190 {
Chris@82 191 E T4, T5, Tl, Tm;
Chris@82 192 T4 = R0[WS(rs, 2)];
Chris@82 193 T5 = R1[WS(rs, 5)];
Chris@82 194 T6 = T4 - T5;
Chris@82 195 Tv = T4 + T5;
Chris@82 196 Tl = R0[WS(rs, 6)];
Chris@82 197 Tm = R1[WS(rs, 2)];
Chris@82 198 Tn = Tl - Tm;
Chris@82 199 Ts = Tl + Tm;
Chris@82 200 }
Chris@82 201 {
Chris@82 202 E Ti, Tj, Tb, Tc;
Chris@82 203 Ti = R0[WS(rs, 1)];
Chris@82 204 Tj = R1[WS(rs, 4)];
Chris@82 205 Tk = Ti - Tj;
Chris@82 206 Tt = Ti + Tj;
Chris@82 207 Tb = R0[WS(rs, 3)];
Chris@82 208 Tc = R1[WS(rs, 6)];
Chris@82 209 Td = Tb - Tc;
Chris@82 210 Ty = Tb + Tc;
Chris@82 211 }
Chris@82 212 {
Chris@82 213 E T7, T8, Te, Tf;
Chris@82 214 T7 = R0[WS(rs, 5)];
Chris@82 215 T8 = R1[WS(rs, 1)];
Chris@82 216 T9 = T7 - T8;
Chris@82 217 Tw = T7 + T8;
Chris@82 218 Te = R0[WS(rs, 4)];
Chris@82 219 Tf = R1[0];
Chris@82 220 Tg = Te - Tf;
Chris@82 221 Tz = Te + Tf;
Chris@82 222 }
Chris@82 223 {
Chris@82 224 E Tp, Tr, Tq, Ta, To, Th;
Chris@82 225 Tp = Tn - Tk;
Chris@82 226 Tr = Tg - Td;
Chris@82 227 Tq = T9 - T6;
Chris@82 228 Ci[WS(csi, 1)] = FMA(KP781831482, Tp, KP974927912 * Tq) + (KP433883739 * Tr);
Chris@82 229 Ci[WS(csi, 5)] = FMA(KP433883739, Tq, KP781831482 * Tr) - (KP974927912 * Tp);
Chris@82 230 Ci[WS(csi, 3)] = FMA(KP433883739, Tp, KP974927912 * Tr) - (KP781831482 * Tq);
Chris@82 231 Ta = T6 + T9;
Chris@82 232 To = Tk + Tn;
Chris@82 233 Th = Td + Tg;
Chris@82 234 Cr[WS(csr, 3)] = FMA(KP623489801, Ta, T3) + FNMA(KP222520933, Th, KP900968867 * To);
Chris@82 235 Cr[WS(csr, 7)] = T3 + To + Ta + Th;
Chris@82 236 Cr[WS(csr, 1)] = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
Chris@82 237 Cr[WS(csr, 5)] = FMA(KP623489801, Th, T3) + FNMA(KP900968867, Ta, KP222520933 * To);
Chris@82 238 }
Chris@82 239 {
Chris@82 240 E Tu, TA, Tx, TC, TE, TD;
Chris@82 241 Tu = Ts - Tt;
Chris@82 242 TA = Ty - Tz;
Chris@82 243 Tx = Tv - Tw;
Chris@82 244 Ci[WS(csi, 2)] = FMA(KP974927912, Tu, KP433883739 * Tx) + (KP781831482 * TA);
Chris@82 245 Ci[WS(csi, 6)] = FMA(KP974927912, Tx, KP433883739 * TA) - (KP781831482 * Tu);
Chris@82 246 Ci[WS(csi, 4)] = FNMS(KP781831482, Tx, KP974927912 * TA) - (KP433883739 * Tu);
Chris@82 247 TC = Tt + Ts;
Chris@82 248 TE = Tv + Tw;
Chris@82 249 TD = Ty + Tz;
Chris@82 250 Cr[WS(csr, 6)] = FMA(KP623489801, TC, TB) + FNMA(KP900968867, TD, KP222520933 * TE);
Chris@82 251 Cr[WS(csr, 2)] = FMA(KP623489801, TD, TB) + FNMA(KP900968867, TE, KP222520933 * TC);
Chris@82 252 Cr[WS(csr, 4)] = FMA(KP623489801, TE, TB) + FNMA(KP222520933, TD, KP900968867 * TC);
Chris@82 253 Cr[0] = TB + TC + TE + TD;
Chris@82 254 }
Chris@82 255 }
Chris@82 256 }
Chris@82 257 }
Chris@82 258
Chris@82 259 static const kr2c_desc desc = { 14, "r2cf_14", {38, 12, 24, 0}, &GENUS };
Chris@82 260
Chris@82 261 void X(codelet_r2cf_14) (planner *p) {
Chris@82 262 X(kr2c_register) (p, r2cf_14, &desc);
Chris@82 263 }
Chris@82 264
Chris@82 265 #endif