annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cb_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:28 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include rdft/scalar/r2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 62 FP additions, 44 FP multiplications,
Chris@82 32 * (or, 18 additions, 0 multiplications, 44 fused multiply/add),
Chris@82 33 * 46 stack variables, 7 constants, and 28 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cb.h"
Chris@82 36
Chris@82 37 static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
Chris@82 40 DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
Chris@82 41 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 42 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 43 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 44 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 45 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 46 {
Chris@82 47 INT i;
Chris@82 48 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@82 49 E T3, Te, To, TK, Tu, TM, Tr, TL, Tv, TA, TX, TS, TN, TF, T6;
Chris@82 50 E Tf, Tc, Th, T9, Tg, Tj, Tx, TU, TP, TH, TC, T1, T2, Td, Ti;
Chris@82 51 T1 = Cr[0];
Chris@82 52 T2 = Cr[WS(csr, 7)];
Chris@82 53 T3 = T1 - T2;
Chris@82 54 Te = T1 + T2;
Chris@82 55 {
Chris@82 56 E Tm, Tn, T4, T5;
Chris@82 57 Tm = Ci[WS(csi, 4)];
Chris@82 58 Tn = Ci[WS(csi, 3)];
Chris@82 59 To = Tm - Tn;
Chris@82 60 TK = Tm + Tn;
Chris@82 61 {
Chris@82 62 E Ts, Tt, Tp, Tq;
Chris@82 63 Ts = Ci[WS(csi, 6)];
Chris@82 64 Tt = Ci[WS(csi, 1)];
Chris@82 65 Tu = Ts - Tt;
Chris@82 66 TM = Ts + Tt;
Chris@82 67 Tp = Ci[WS(csi, 2)];
Chris@82 68 Tq = Ci[WS(csi, 5)];
Chris@82 69 Tr = Tp - Tq;
Chris@82 70 TL = Tp + Tq;
Chris@82 71 }
Chris@82 72 Tv = FMA(KP554958132, Tu, Tr);
Chris@82 73 TA = FMA(KP554958132, To, Tu);
Chris@82 74 TX = FNMS(KP554958132, TL, TK);
Chris@82 75 TS = FMA(KP554958132, TK, TM);
Chris@82 76 TN = FMA(KP554958132, TM, TL);
Chris@82 77 TF = FNMS(KP554958132, Tr, To);
Chris@82 78 T4 = Cr[WS(csr, 2)];
Chris@82 79 T5 = Cr[WS(csr, 5)];
Chris@82 80 T6 = T4 - T5;
Chris@82 81 Tf = T4 + T5;
Chris@82 82 {
Chris@82 83 E Ta, Tb, T7, T8;
Chris@82 84 Ta = Cr[WS(csr, 6)];
Chris@82 85 Tb = Cr[WS(csr, 1)];
Chris@82 86 Tc = Ta - Tb;
Chris@82 87 Th = Ta + Tb;
Chris@82 88 T7 = Cr[WS(csr, 4)];
Chris@82 89 T8 = Cr[WS(csr, 3)];
Chris@82 90 T9 = T7 - T8;
Chris@82 91 Tg = T7 + T8;
Chris@82 92 }
Chris@82 93 Tj = FNMS(KP356895867, Tg, Tf);
Chris@82 94 Tx = FNMS(KP356895867, Tf, Th);
Chris@82 95 TU = FNMS(KP356895867, Tc, T9);
Chris@82 96 TP = FNMS(KP356895867, T6, Tc);
Chris@82 97 TH = FNMS(KP356895867, T9, T6);
Chris@82 98 TC = FNMS(KP356895867, Th, Tg);
Chris@82 99 }
Chris@82 100 Td = T6 + T9 + Tc;
Chris@82 101 R1[WS(rs, 3)] = FMA(KP2_000000000, Td, T3);
Chris@82 102 Ti = Tf + Tg + Th;
Chris@82 103 R0[0] = FMA(KP2_000000000, Ti, Te);
Chris@82 104 {
Chris@82 105 E Tw, Tl, Tk, TY, TW, TV;
Chris@82 106 Tw = FMA(KP801937735, Tv, To);
Chris@82 107 Tk = FNMS(KP692021471, Tj, Th);
Chris@82 108 Tl = FNMS(KP1_801937735, Tk, Te);
Chris@82 109 R0[WS(rs, 4)] = FNMS(KP1_949855824, Tw, Tl);
Chris@82 110 R0[WS(rs, 3)] = FMA(KP1_949855824, Tw, Tl);
Chris@82 111 TY = FNMS(KP801937735, TX, TM);
Chris@82 112 TV = FNMS(KP692021471, TU, T6);
Chris@82 113 TW = FNMS(KP1_801937735, TV, T3);
Chris@82 114 R1[WS(rs, 1)] = FNMS(KP1_949855824, TY, TW);
Chris@82 115 R1[WS(rs, 5)] = FMA(KP1_949855824, TY, TW);
Chris@82 116 }
Chris@82 117 {
Chris@82 118 E TB, Tz, Ty, TO, TJ, TI;
Chris@82 119 TB = FNMS(KP801937735, TA, Tr);
Chris@82 120 Ty = FNMS(KP692021471, Tx, Tg);
Chris@82 121 Tz = FNMS(KP1_801937735, Ty, Te);
Chris@82 122 R0[WS(rs, 1)] = FNMS(KP1_949855824, TB, Tz);
Chris@82 123 R0[WS(rs, 6)] = FMA(KP1_949855824, TB, Tz);
Chris@82 124 TO = FMA(KP801937735, TN, TK);
Chris@82 125 TI = FNMS(KP692021471, TH, Tc);
Chris@82 126 TJ = FNMS(KP1_801937735, TI, T3);
Chris@82 127 R1[0] = FNMS(KP1_949855824, TO, TJ);
Chris@82 128 R1[WS(rs, 6)] = FMA(KP1_949855824, TO, TJ);
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E TT, TR, TQ, TG, TE, TD;
Chris@82 132 TT = FNMS(KP801937735, TS, TL);
Chris@82 133 TQ = FNMS(KP692021471, TP, T9);
Chris@82 134 TR = FNMS(KP1_801937735, TQ, T3);
Chris@82 135 R1[WS(rs, 4)] = FNMS(KP1_949855824, TT, TR);
Chris@82 136 R1[WS(rs, 2)] = FMA(KP1_949855824, TT, TR);
Chris@82 137 TG = FNMS(KP801937735, TF, Tu);
Chris@82 138 TD = FNMS(KP692021471, TC, Tf);
Chris@82 139 TE = FNMS(KP1_801937735, TD, Te);
Chris@82 140 R0[WS(rs, 5)] = FNMS(KP1_949855824, TG, TE);
Chris@82 141 R0[WS(rs, 2)] = FMA(KP1_949855824, TG, TE);
Chris@82 142 }
Chris@82 143 }
Chris@82 144 }
Chris@82 145 }
Chris@82 146
Chris@82 147 static const kr2c_desc desc = { 14, "r2cb_14", {18, 0, 44, 0}, &GENUS };
Chris@82 148
Chris@82 149 void X(codelet_r2cb_14) (planner *p) {
Chris@82 150 X(kr2c_register) (p, r2cb_14, &desc);
Chris@82 151 }
Chris@82 152
Chris@82 153 #else
Chris@82 154
Chris@82 155 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 14 -name r2cb_14 -include rdft/scalar/r2cb.h */
Chris@82 156
Chris@82 157 /*
Chris@82 158 * This function contains 62 FP additions, 38 FP multiplications,
Chris@82 159 * (or, 36 additions, 12 multiplications, 26 fused multiply/add),
Chris@82 160 * 28 stack variables, 7 constants, and 28 memory accesses
Chris@82 161 */
Chris@82 162 #include "rdft/scalar/r2cb.h"
Chris@82 163
Chris@82 164 static void r2cb_14(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 165 {
Chris@82 166 DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
Chris@82 167 DK(KP445041867, +0.445041867912628808577805128993589518932711138);
Chris@82 168 DK(KP1_246979603, +1.246979603717467061050009768008479621264549462);
Chris@82 169 DK(KP867767478, +0.867767478235116240951536665696717509219981456);
Chris@82 170 DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
Chris@82 171 DK(KP1_563662964, +1.563662964936059617416889053348115500464669037);
Chris@82 172 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 173 {
Chris@82 174 INT i;
Chris@82 175 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(56, rs), MAKE_VOLATILE_STRIDE(56, csr), MAKE_VOLATILE_STRIDE(56, csi)) {
Chris@82 176 E T3, Td, T6, Te, Tq, Tz, Tn, Ty, Tc, Tg, Tk, Tx, T9, Tf, T1;
Chris@82 177 E T2;
Chris@82 178 T1 = Cr[0];
Chris@82 179 T2 = Cr[WS(csr, 7)];
Chris@82 180 T3 = T1 - T2;
Chris@82 181 Td = T1 + T2;
Chris@82 182 {
Chris@82 183 E T4, T5, To, Tp;
Chris@82 184 T4 = Cr[WS(csr, 2)];
Chris@82 185 T5 = Cr[WS(csr, 5)];
Chris@82 186 T6 = T4 - T5;
Chris@82 187 Te = T4 + T5;
Chris@82 188 To = Ci[WS(csi, 2)];
Chris@82 189 Tp = Ci[WS(csi, 5)];
Chris@82 190 Tq = To - Tp;
Chris@82 191 Tz = To + Tp;
Chris@82 192 }
Chris@82 193 {
Chris@82 194 E Tl, Tm, Ta, Tb;
Chris@82 195 Tl = Ci[WS(csi, 6)];
Chris@82 196 Tm = Ci[WS(csi, 1)];
Chris@82 197 Tn = Tl - Tm;
Chris@82 198 Ty = Tl + Tm;
Chris@82 199 Ta = Cr[WS(csr, 6)];
Chris@82 200 Tb = Cr[WS(csr, 1)];
Chris@82 201 Tc = Ta - Tb;
Chris@82 202 Tg = Ta + Tb;
Chris@82 203 }
Chris@82 204 {
Chris@82 205 E Ti, Tj, T7, T8;
Chris@82 206 Ti = Ci[WS(csi, 4)];
Chris@82 207 Tj = Ci[WS(csi, 3)];
Chris@82 208 Tk = Ti - Tj;
Chris@82 209 Tx = Ti + Tj;
Chris@82 210 T7 = Cr[WS(csr, 4)];
Chris@82 211 T8 = Cr[WS(csr, 3)];
Chris@82 212 T9 = T7 - T8;
Chris@82 213 Tf = T7 + T8;
Chris@82 214 }
Chris@82 215 R1[WS(rs, 3)] = FMA(KP2_000000000, T6 + T9 + Tc, T3);
Chris@82 216 R0[0] = FMA(KP2_000000000, Te + Tf + Tg, Td);
Chris@82 217 {
Chris@82 218 E Tr, Th, TE, TD;
Chris@82 219 Tr = FNMS(KP1_949855824, Tn, KP1_563662964 * Tk) - (KP867767478 * Tq);
Chris@82 220 Th = FMA(KP1_246979603, Tf, Td) + FNMA(KP445041867, Tg, KP1_801937735 * Te);
Chris@82 221 R0[WS(rs, 2)] = Th - Tr;
Chris@82 222 R0[WS(rs, 5)] = Th + Tr;
Chris@82 223 TE = FMA(KP867767478, Tx, KP1_563662964 * Ty) - (KP1_949855824 * Tz);
Chris@82 224 TD = FMA(KP1_246979603, Tc, T3) + FNMA(KP1_801937735, T9, KP445041867 * T6);
Chris@82 225 R1[WS(rs, 2)] = TD - TE;
Chris@82 226 R1[WS(rs, 4)] = TD + TE;
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E Tt, Ts, TA, Tw;
Chris@82 230 Tt = FMA(KP867767478, Tk, KP1_563662964 * Tn) - (KP1_949855824 * Tq);
Chris@82 231 Ts = FMA(KP1_246979603, Tg, Td) + FNMA(KP1_801937735, Tf, KP445041867 * Te);
Chris@82 232 R0[WS(rs, 6)] = Ts - Tt;
Chris@82 233 R0[WS(rs, 1)] = Ts + Tt;
Chris@82 234 TA = FNMS(KP1_949855824, Ty, KP1_563662964 * Tx) - (KP867767478 * Tz);
Chris@82 235 Tw = FMA(KP1_246979603, T9, T3) + FNMA(KP445041867, Tc, KP1_801937735 * T6);
Chris@82 236 R1[WS(rs, 5)] = Tw - TA;
Chris@82 237 R1[WS(rs, 1)] = Tw + TA;
Chris@82 238 }
Chris@82 239 {
Chris@82 240 E TC, TB, Tv, Tu;
Chris@82 241 TC = FMA(KP1_563662964, Tz, KP1_949855824 * Tx) + (KP867767478 * Ty);
Chris@82 242 TB = FMA(KP1_246979603, T6, T3) + FNMA(KP1_801937735, Tc, KP445041867 * T9);
Chris@82 243 R1[0] = TB - TC;
Chris@82 244 R1[WS(rs, 6)] = TB + TC;
Chris@82 245 Tv = FMA(KP1_563662964, Tq, KP1_949855824 * Tk) + (KP867767478 * Tn);
Chris@82 246 Tu = FMA(KP1_246979603, Te, Td) + FNMA(KP1_801937735, Tg, KP445041867 * Tf);
Chris@82 247 R0[WS(rs, 4)] = Tu - Tv;
Chris@82 248 R0[WS(rs, 3)] = Tu + Tv;
Chris@82 249 }
Chris@82 250 }
Chris@82 251 }
Chris@82 252 }
Chris@82 253
Chris@82 254 static const kr2c_desc desc = { 14, "r2cb_14", {36, 12, 26, 0}, &GENUS };
Chris@82 255
Chris@82 256 void X(codelet_r2cb_14) (planner *p) {
Chris@82 257 X(kr2c_register) (p, r2cb_14, &desc);
Chris@82 258 }
Chris@82 259
Chris@82 260 #endif