annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cb_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:28 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include r2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 62 FP additions, 44 FP multiplications,
Chris@42 32 * (or, 18 additions, 0 multiplications, 44 fused multiply/add),
Chris@42 33 * 58 stack variables, 7 constants, and 28 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cb.h"
Chris@42 36
Chris@42 37 static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
Chris@42 40 DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
Chris@42 41 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@42 42 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 43 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@42 44 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@42 45 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@42 46 {
Chris@42 47 INT i;
Chris@42 48 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@42 49 E Te, TO, TT, TG, TJ, TD, TR, TE;
Chris@42 50 {
Chris@42 51 E T3, TK, To, TM, Tu, TL, Tr, TS, TA, TN, TX, TF, Tv, T7, Tf;
Chris@42 52 E T6, Th, Tc, T8, T1, T2;
Chris@42 53 T1 = Cr[0];
Chris@42 54 T2 = Cr[WS(csr, 7)];
Chris@42 55 {
Chris@42 56 E Ts, Tt, Tp, Tq, Tm, Tn;
Chris@42 57 Tm = Ci[WS(csi, 4)];
Chris@42 58 Tn = Ci[WS(csi, 3)];
Chris@42 59 Ts = Ci[WS(csi, 6)];
Chris@42 60 Te = T1 + T2;
Chris@42 61 T3 = T1 - T2;
Chris@42 62 TK = Tm + Tn;
Chris@42 63 To = Tm - Tn;
Chris@42 64 Tt = Ci[WS(csi, 1)];
Chris@42 65 Tp = Ci[WS(csi, 2)];
Chris@42 66 Tq = Ci[WS(csi, 5)];
Chris@42 67 {
Chris@42 68 E T4, T5, Ta, Tb;
Chris@42 69 T4 = Cr[WS(csr, 2)];
Chris@42 70 TM = Ts + Tt;
Chris@42 71 Tu = Ts - Tt;
Chris@42 72 TL = Tp + Tq;
Chris@42 73 Tr = Tp - Tq;
Chris@42 74 TS = FMA(KP554958132, TK, TM);
Chris@42 75 TA = FMA(KP554958132, To, Tu);
Chris@42 76 TN = FMA(KP554958132, TM, TL);
Chris@42 77 TX = FNMS(KP554958132, TL, TK);
Chris@42 78 TF = FNMS(KP554958132, Tr, To);
Chris@42 79 Tv = FMA(KP554958132, Tu, Tr);
Chris@42 80 T5 = Cr[WS(csr, 5)];
Chris@42 81 Ta = Cr[WS(csr, 6)];
Chris@42 82 Tb = Cr[WS(csr, 1)];
Chris@42 83 T7 = Cr[WS(csr, 4)];
Chris@42 84 Tf = T4 + T5;
Chris@42 85 T6 = T4 - T5;
Chris@42 86 Th = Ta + Tb;
Chris@42 87 Tc = Ta - Tb;
Chris@42 88 T8 = Cr[WS(csr, 3)];
Chris@42 89 }
Chris@42 90 }
Chris@42 91 {
Chris@42 92 E Tw, Tx, TP, Tg, T9, TY, TC, TI, TQ;
Chris@42 93 Tw = FMA(KP801937735, Tv, To);
Chris@42 94 Tx = FNMS(KP356895867, Tf, Th);
Chris@42 95 TP = FNMS(KP356895867, T6, Tc);
Chris@42 96 Tg = T7 + T8;
Chris@42 97 T9 = T7 - T8;
Chris@42 98 TY = FNMS(KP801937735, TX, TM);
Chris@42 99 {
Chris@42 100 E TB, TH, TV, Ty, Tl, Ti, TW, Tz;
Chris@42 101 TB = FNMS(KP801937735, TA, Tr);
Chris@42 102 Ti = Tf + Tg + Th;
Chris@42 103 TC = FNMS(KP356895867, Th, Tg);
Chris@42 104 {
Chris@42 105 E Tj, Td, TU, Tk;
Chris@42 106 Tj = FNMS(KP356895867, Tg, Tf);
Chris@42 107 Td = T6 + T9 + Tc;
Chris@42 108 TH = FNMS(KP356895867, T9, T6);
Chris@42 109 TU = FNMS(KP356895867, Tc, T9);
Chris@42 110 R0[0] = FMA(KP2_000000000, Ti, Te);
Chris@42 111 Tk = FNMS(KP692021471, Tj, Th);
Chris@42 112 R1[WS(rs, 3)] = FMA(KP2_000000000, Td, T3);
Chris@42 113 TV = FNMS(KP692021471, TU, T6);
Chris@42 114 Ty = FNMS(KP692021471, Tx, Tg);
Chris@42 115 Tl = FNMS(KP1_801937735, Tk, Te);
Chris@42 116 }
Chris@42 117 TO = FMA(KP801937735, TN, TK);
Chris@42 118 TW = FNMS(KP1_801937735, TV, T3);
Chris@42 119 Tz = FNMS(KP1_801937735, Ty, Te);
Chris@42 120 R0[WS(rs, 3)] = FMA(KP1_949855824, Tw, Tl);
Chris@42 121 R0[WS(rs, 4)] = FNMS(KP1_949855824, Tw, Tl);
Chris@42 122 R1[WS(rs, 5)] = FMA(KP1_949855824, TY, TW);
Chris@42 123 R1[WS(rs, 1)] = FNMS(KP1_949855824, TY, TW);
Chris@42 124 R0[WS(rs, 6)] = FMA(KP1_949855824, TB, Tz);
Chris@42 125 R0[WS(rs, 1)] = FNMS(KP1_949855824, TB, Tz);
Chris@42 126 TI = FNMS(KP692021471, TH, Tc);
Chris@42 127 }
Chris@42 128 TT = FNMS(KP801937735, TS, TL);
Chris@42 129 TQ = FNMS(KP692021471, TP, T9);
Chris@42 130 TG = FNMS(KP801937735, TF, Tu);
Chris@42 131 TJ = FNMS(KP1_801937735, TI, T3);
Chris@42 132 TD = FNMS(KP692021471, TC, Tf);
Chris@42 133 TR = FNMS(KP1_801937735, TQ, T3);
Chris@42 134 }
Chris@42 135 }
Chris@42 136 R1[WS(rs, 6)] = FMA(KP1_949855824, TO, TJ);
Chris@42 137 R1[0] = FNMS(KP1_949855824, TO, TJ);
Chris@42 138 TE = FNMS(KP1_801937735, TD, Te);
Chris@42 139 R1[WS(rs, 2)] = FMA(KP1_949855824, TT, TR);
Chris@42 140 R1[WS(rs, 4)] = FNMS(KP1_949855824, TT, TR);
Chris@42 141 R0[WS(rs, 2)] = FMA(KP1_949855824, TG, TE);
Chris@42 142 R0[WS(rs, 5)] = FNMS(KP1_949855824, TG, TE);
Chris@42 143 }
Chris@42 144 }
Chris@42 145 }
Chris@42 146
Chris@42 147 static const kr2c_desc desc = { 14, "r2cb_14", {18, 0, 44, 0}, &GENUS };
Chris@42 148
Chris@42 149 void X(codelet_r2cb_14) (planner *p) {
Chris@42 150 X(kr2c_register) (p, r2cb_14, &desc);
Chris@42 151 }
Chris@42 152
Chris@42 153 #else /* HAVE_FMA */
Chris@42 154
Chris@42 155 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include r2cb.h */
Chris@42 156
Chris@42 157 /*
Chris@42 158 * This function contains 62 FP additions, 38 FP multiplications,
Chris@42 159 * (or, 36 additions, 12 multiplications, 26 fused multiply/add),
Chris@42 160 * 28 stack variables, 7 constants, and 28 memory accesses
Chris@42 161 */
Chris@42 162 #include "r2cb.h"
Chris@42 163
Chris@42 164 static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 165 {
Chris@42 166 DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
Chris@42 167 DK(KP445041867, +0.445041867912628808577805128993589518932711138);
Chris@42 168 DK(KP1_246979603, +1.246979603717467061050009768008479621264549462);
Chris@42 169 DK(KP867767478, +0.867767478235116240951536665696717509219981456);
Chris@42 170 DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
Chris@42 171 DK(KP1_563662964, +1.563662964936059617416889053348115500464669037);
Chris@42 172 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 173 {
Chris@42 174 INT i;
Chris@42 175 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@42 176 E T3, Td, T6, Te, Tq, Tz, Tn, Ty, Tc, Tg, Tk, Tx, T9, Tf, T1;
Chris@42 177 E T2;
Chris@42 178 T1 = Cr[0];
Chris@42 179 T2 = Cr[WS(csr, 7)];
Chris@42 180 T3 = T1 - T2;
Chris@42 181 Td = T1 + T2;
Chris@42 182 {
Chris@42 183 E T4, T5, To, Tp;
Chris@42 184 T4 = Cr[WS(csr, 2)];
Chris@42 185 T5 = Cr[WS(csr, 5)];
Chris@42 186 T6 = T4 - T5;
Chris@42 187 Te = T4 + T5;
Chris@42 188 To = Ci[WS(csi, 2)];
Chris@42 189 Tp = Ci[WS(csi, 5)];
Chris@42 190 Tq = To - Tp;
Chris@42 191 Tz = To + Tp;
Chris@42 192 }
Chris@42 193 {
Chris@42 194 E Tl, Tm, Ta, Tb;
Chris@42 195 Tl = Ci[WS(csi, 6)];
Chris@42 196 Tm = Ci[WS(csi, 1)];
Chris@42 197 Tn = Tl - Tm;
Chris@42 198 Ty = Tl + Tm;
Chris@42 199 Ta = Cr[WS(csr, 6)];
Chris@42 200 Tb = Cr[WS(csr, 1)];
Chris@42 201 Tc = Ta - Tb;
Chris@42 202 Tg = Ta + Tb;
Chris@42 203 }
Chris@42 204 {
Chris@42 205 E Ti, Tj, T7, T8;
Chris@42 206 Ti = Ci[WS(csi, 4)];
Chris@42 207 Tj = Ci[WS(csi, 3)];
Chris@42 208 Tk = Ti - Tj;
Chris@42 209 Tx = Ti + Tj;
Chris@42 210 T7 = Cr[WS(csr, 4)];
Chris@42 211 T8 = Cr[WS(csr, 3)];
Chris@42 212 T9 = T7 - T8;
Chris@42 213 Tf = T7 + T8;
Chris@42 214 }
Chris@42 215 R1[WS(rs, 3)] = FMA(KP2_000000000, T6 + T9 + Tc, T3);
Chris@42 216 R0[0] = FMA(KP2_000000000, Te + Tf + Tg, Td);
Chris@42 217 {
Chris@42 218 E Tr, Th, TE, TD;
Chris@42 219 Tr = FNMS(KP1_949855824, Tn, KP1_563662964 * Tk) - (KP867767478 * Tq);
Chris@42 220 Th = FMA(KP1_246979603, Tf, Td) + FNMA(KP445041867, Tg, KP1_801937735 * Te);
Chris@42 221 R0[WS(rs, 2)] = Th - Tr;
Chris@42 222 R0[WS(rs, 5)] = Th + Tr;
Chris@42 223 TE = FMA(KP867767478, Tx, KP1_563662964 * Ty) - (KP1_949855824 * Tz);
Chris@42 224 TD = FMA(KP1_246979603, Tc, T3) + FNMA(KP1_801937735, T9, KP445041867 * T6);
Chris@42 225 R1[WS(rs, 2)] = TD - TE;
Chris@42 226 R1[WS(rs, 4)] = TD + TE;
Chris@42 227 }
Chris@42 228 {
Chris@42 229 E Tt, Ts, TA, Tw;
Chris@42 230 Tt = FMA(KP867767478, Tk, KP1_563662964 * Tn) - (KP1_949855824 * Tq);
Chris@42 231 Ts = FMA(KP1_246979603, Tg, Td) + FNMA(KP1_801937735, Tf, KP445041867 * Te);
Chris@42 232 R0[WS(rs, 6)] = Ts - Tt;
Chris@42 233 R0[WS(rs, 1)] = Ts + Tt;
Chris@42 234 TA = FNMS(KP1_949855824, Ty, KP1_563662964 * Tx) - (KP867767478 * Tz);
Chris@42 235 Tw = FMA(KP1_246979603, T9, T3) + FNMA(KP445041867, Tc, KP1_801937735 * T6);
Chris@42 236 R1[WS(rs, 5)] = Tw - TA;
Chris@42 237 R1[WS(rs, 1)] = Tw + TA;
Chris@42 238 }
Chris@42 239 {
Chris@42 240 E TC, TB, Tv, Tu;
Chris@42 241 TC = FMA(KP1_563662964, Tz, KP1_949855824 * Tx) + (KP867767478 * Ty);
Chris@42 242 TB = FMA(KP1_246979603, T6, T3) + FNMA(KP1_801937735, Tc, KP445041867 * T9);
Chris@42 243 R1[0] = TB - TC;
Chris@42 244 R1[WS(rs, 6)] = TB + TC;
Chris@42 245 Tv = FMA(KP1_563662964, Tq, KP1_949855824 * Tk) + (KP867767478 * Tn);
Chris@42 246 Tu = FMA(KP1_246979603, Te, Td) + FNMA(KP1_801937735, Tg, KP445041867 * Tf);
Chris@42 247 R0[WS(rs, 4)] = Tu - Tv;
Chris@42 248 R0[WS(rs, 3)] = Tu + Tv;
Chris@42 249 }
Chris@42 250 }
Chris@42 251 }
Chris@42 252 }
Chris@42 253
Chris@42 254 static const kr2c_desc desc = { 14, "r2cb_14", {36, 12, 26, 0}, &GENUS };
Chris@42 255
Chris@42 256 void X(codelet_r2cb_14) (planner *p) {
Chris@42 257 X(kr2c_register) (p, r2cb_14, &desc);
Chris@42 258 }
Chris@42 259
Chris@42 260 #endif /* HAVE_FMA */