annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cb_11.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:26 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 11 -name r2cb_11 -include r2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 60 FP additions, 56 FP multiplications,
Chris@42 32 * (or, 4 additions, 0 multiplications, 56 fused multiply/add),
Chris@42 33 * 53 stack variables, 11 constants, and 22 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cb.h"
Chris@42 36
Chris@42 37 static void r2cb_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP1_979642883, +1.979642883761865464752184075553437574753038744);
Chris@42 40 DK(KP1_918985947, +1.918985947228994779780736114132655398124909697);
Chris@42 41 DK(KP876768831, +0.876768831002589333891339807079336796764054852);
Chris@42 42 DK(KP918985947, +0.918985947228994779780736114132655398124909697);
Chris@42 43 DK(KP778434453, +0.778434453334651800608337670740821884709317477);
Chris@42 44 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 45 DK(KP634356270, +0.634356270682424498893150776899916060542806975);
Chris@42 46 DK(KP342584725, +0.342584725681637509502641509861112333758894680);
Chris@42 47 DK(KP830830026, +0.830830026003772851058548298459246407048009821);
Chris@42 48 DK(KP715370323, +0.715370323453429719112414662767260662417897278);
Chris@42 49 DK(KP521108558, +0.521108558113202722944698153526659300680427422);
Chris@42 50 {
Chris@42 51 INT i;
Chris@42 52 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) {
Chris@42 53 E Tf, Tq, Tt, Tu;
Chris@42 54 {
Chris@42 55 E T1, Td, Th, Te, Tg, T2, Ts, TK, TB, TT, Tj, T6, T3, T4, T5;
Chris@42 56 E Tr;
Chris@42 57 T1 = Cr[0];
Chris@42 58 Td = Ci[WS(csi, 3)];
Chris@42 59 Th = Ci[WS(csi, 5)];
Chris@42 60 Te = Ci[WS(csi, 2)];
Chris@42 61 Tf = Ci[WS(csi, 4)];
Chris@42 62 Tg = Ci[WS(csi, 1)];
Chris@42 63 Tr = FMA(KP521108558, Td, Th);
Chris@42 64 T2 = Cr[WS(csr, 1)];
Chris@42 65 {
Chris@42 66 E TJ, TA, TS, Ti;
Chris@42 67 TJ = FMA(KP521108558, Tf, Td);
Chris@42 68 TA = FNMS(KP521108558, Te, Tf);
Chris@42 69 TS = FMS(KP521108558, Tg, Te);
Chris@42 70 Ti = FMA(KP521108558, Th, Tg);
Chris@42 71 Ts = FNMS(KP715370323, Tr, Te);
Chris@42 72 TK = FMA(KP715370323, TJ, Tg);
Chris@42 73 TB = FMA(KP715370323, TA, Th);
Chris@42 74 TT = FMA(KP715370323, TS, Td);
Chris@42 75 Tj = FMA(KP715370323, Ti, Tf);
Chris@42 76 T6 = Cr[WS(csr, 5)];
Chris@42 77 }
Chris@42 78 T3 = Cr[WS(csr, 2)];
Chris@42 79 T4 = Cr[WS(csr, 3)];
Chris@42 80 T5 = Cr[WS(csr, 4)];
Chris@42 81 {
Chris@42 82 E TG, Tx, To, Tl, Tb, TU, TQ, TP, Ta;
Chris@42 83 {
Chris@42 84 E Tk, TE, Tv, T8;
Chris@42 85 Tk = FMA(KP830830026, Tj, Te);
Chris@42 86 TE = FNMS(KP342584725, T3, T6);
Chris@42 87 Tv = FNMS(KP342584725, T2, T4);
Chris@42 88 T8 = FNMS(KP342584725, T4, T3);
Chris@42 89 {
Chris@42 90 E T7, Tm, TN, TF;
Chris@42 91 T7 = T2 + T3 + T4 + T5 + T6;
Chris@42 92 Tm = FNMS(KP342584725, T5, T2);
Chris@42 93 TN = FNMS(KP342584725, T6, T5);
Chris@42 94 TF = FNMS(KP634356270, TE, T2);
Chris@42 95 {
Chris@42 96 E Tw, T9, Tn, TO;
Chris@42 97 Tw = FNMS(KP634356270, Tv, T6);
Chris@42 98 T9 = FNMS(KP634356270, T8, T5);
Chris@42 99 R0[0] = FMA(KP2_000000000, T7, T1);
Chris@42 100 Tn = FNMS(KP634356270, Tm, T3);
Chris@42 101 TO = FNMS(KP634356270, TN, T4);
Chris@42 102 TG = FNMS(KP778434453, TF, T4);
Chris@42 103 Tx = FNMS(KP778434453, Tw, T5);
Chris@42 104 Ta = FNMS(KP778434453, T9, T2);
Chris@42 105 To = FNMS(KP778434453, Tn, T6);
Chris@42 106 TP = FNMS(KP778434453, TO, T3);
Chris@42 107 Tl = FMA(KP918985947, Tk, Td);
Chris@42 108 }
Chris@42 109 }
Chris@42 110 }
Chris@42 111 Tb = FNMS(KP876768831, Ta, T6);
Chris@42 112 TU = FNMS(KP830830026, TT, Tf);
Chris@42 113 TQ = FNMS(KP876768831, TP, T2);
Chris@42 114 {
Chris@42 115 E TI, TL, Ty, TC;
Chris@42 116 {
Chris@42 117 E Tc, TV, TR, TH;
Chris@42 118 TH = FNMS(KP876768831, TG, T5);
Chris@42 119 Tc = FNMS(KP1_918985947, Tb, T1);
Chris@42 120 TV = FNMS(KP918985947, TU, Th);
Chris@42 121 TR = FNMS(KP1_918985947, TQ, T1);
Chris@42 122 TI = FNMS(KP1_918985947, TH, T1);
Chris@42 123 R0[WS(rs, 5)] = FMA(KP1_979642883, Tl, Tc);
Chris@42 124 R1[0] = FNMS(KP1_979642883, Tl, Tc);
Chris@42 125 R0[WS(rs, 3)] = FMA(KP1_979642883, TV, TR);
Chris@42 126 R1[WS(rs, 2)] = FNMS(KP1_979642883, TV, TR);
Chris@42 127 TL = FNMS(KP830830026, TK, Th);
Chris@42 128 }
Chris@42 129 Ty = FNMS(KP876768831, Tx, T3);
Chris@42 130 TC = FNMS(KP830830026, TB, Td);
Chris@42 131 {
Chris@42 132 E TM, Tz, TD, Tp;
Chris@42 133 Tp = FNMS(KP876768831, To, T4);
Chris@42 134 TM = FMA(KP918985947, TL, Te);
Chris@42 135 Tz = FNMS(KP1_918985947, Ty, T1);
Chris@42 136 TD = FNMS(KP918985947, TC, Tg);
Chris@42 137 Tq = FNMS(KP1_918985947, Tp, T1);
Chris@42 138 R0[WS(rs, 2)] = FMA(KP1_979642883, TM, TI);
Chris@42 139 R1[WS(rs, 3)] = FNMS(KP1_979642883, TM, TI);
Chris@42 140 R0[WS(rs, 4)] = FMA(KP1_979642883, TD, Tz);
Chris@42 141 R1[WS(rs, 1)] = FNMS(KP1_979642883, TD, Tz);
Chris@42 142 Tt = FMA(KP830830026, Ts, Tg);
Chris@42 143 }
Chris@42 144 }
Chris@42 145 }
Chris@42 146 }
Chris@42 147 Tu = FNMS(KP918985947, Tt, Tf);
Chris@42 148 R0[WS(rs, 1)] = FMA(KP1_979642883, Tu, Tq);
Chris@42 149 R1[WS(rs, 4)] = FNMS(KP1_979642883, Tu, Tq);
Chris@42 150 }
Chris@42 151 }
Chris@42 152 }
Chris@42 153
Chris@42 154 static const kr2c_desc desc = { 11, "r2cb_11", {4, 0, 56, 0}, &GENUS };
Chris@42 155
Chris@42 156 void X(codelet_r2cb_11) (planner *p) {
Chris@42 157 X(kr2c_register) (p, r2cb_11, &desc);
Chris@42 158 }
Chris@42 159
Chris@42 160 #else /* HAVE_FMA */
Chris@42 161
Chris@42 162 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 11 -name r2cb_11 -include r2cb.h */
Chris@42 163
Chris@42 164 /*
Chris@42 165 * This function contains 60 FP additions, 51 FP multiplications,
Chris@42 166 * (or, 19 additions, 10 multiplications, 41 fused multiply/add),
Chris@42 167 * 33 stack variables, 11 constants, and 22 memory accesses
Chris@42 168 */
Chris@42 169 #include "r2cb.h"
Chris@42 170
Chris@42 171 static void r2cb_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 172 {
Chris@42 173 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 174 DK(KP1_918985947, +1.918985947228994779780736114132655398124909697);
Chris@42 175 DK(KP1_309721467, +1.309721467890570128113850144932587106367582399);
Chris@42 176 DK(KP284629676, +0.284629676546570280887585337232739337582102722);
Chris@42 177 DK(KP830830026, +0.830830026003772851058548298459246407048009821);
Chris@42 178 DK(KP1_682507065, +1.682507065662362337723623297838735435026584997);
Chris@42 179 DK(KP563465113, +0.563465113682859395422835830693233798071555798);
Chris@42 180 DK(KP1_511499148, +1.511499148708516567548071687944688840359434890);
Chris@42 181 DK(KP1_979642883, +1.979642883761865464752184075553437574753038744);
Chris@42 182 DK(KP1_819263990, +1.819263990709036742823430766158056920120482102);
Chris@42 183 DK(KP1_081281634, +1.081281634911195164215271908637383390863541216);
Chris@42 184 {
Chris@42 185 INT i;
Chris@42 186 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) {
Chris@42 187 E Td, Tl, Tf, Th, Tj, T1, T2, T6, T5, T4, T3, T7, Tk, Te, Tg;
Chris@42 188 E Ti;
Chris@42 189 {
Chris@42 190 E T8, Tc, T9, Ta, Tb;
Chris@42 191 T8 = Ci[WS(csi, 2)];
Chris@42 192 Tc = Ci[WS(csi, 1)];
Chris@42 193 T9 = Ci[WS(csi, 4)];
Chris@42 194 Ta = Ci[WS(csi, 5)];
Chris@42 195 Tb = Ci[WS(csi, 3)];
Chris@42 196 Td = FMA(KP1_081281634, T8, KP1_819263990 * T9) + FNMA(KP1_979642883, Ta, KP1_511499148 * Tb) - (KP563465113 * Tc);
Chris@42 197 Tl = FMA(KP1_979642883, T8, KP1_819263990 * Ta) + FNMA(KP563465113, T9, KP1_081281634 * Tb) - (KP1_511499148 * Tc);
Chris@42 198 Tf = FMA(KP563465113, T8, KP1_819263990 * Tb) + FNMA(KP1_511499148, Ta, KP1_081281634 * T9) - (KP1_979642883 * Tc);
Chris@42 199 Th = FMA(KP1_081281634, Tc, KP1_819263990 * T8) + FMA(KP1_979642883, Tb, KP1_511499148 * T9) + (KP563465113 * Ta);
Chris@42 200 Tj = FMA(KP563465113, Tb, KP1_979642883 * T9) + FNMS(KP1_511499148, T8, KP1_081281634 * Ta) - (KP1_819263990 * Tc);
Chris@42 201 }
Chris@42 202 T1 = Cr[0];
Chris@42 203 T2 = Cr[WS(csr, 1)];
Chris@42 204 T6 = Cr[WS(csr, 5)];
Chris@42 205 T5 = Cr[WS(csr, 4)];
Chris@42 206 T4 = Cr[WS(csr, 3)];
Chris@42 207 T3 = Cr[WS(csr, 2)];
Chris@42 208 T7 = FMA(KP1_682507065, T3, T1) + FNMS(KP284629676, T6, KP830830026 * T5) + FNMA(KP1_309721467, T4, KP1_918985947 * T2);
Chris@42 209 Tk = FMA(KP1_682507065, T4, T1) + FNMS(KP1_918985947, T5, KP830830026 * T6) + FNMA(KP284629676, T3, KP1_309721467 * T2);
Chris@42 210 Te = FMA(KP830830026, T4, T1) + FNMS(KP1_309721467, T6, KP1_682507065 * T5) + FNMA(KP1_918985947, T3, KP284629676 * T2);
Chris@42 211 Tg = FMA(KP1_682507065, T2, T1) + FNMS(KP1_918985947, T6, KP830830026 * T3) + FNMA(KP1_309721467, T5, KP284629676 * T4);
Chris@42 212 Ti = FMA(KP830830026, T2, T1) + FNMS(KP284629676, T5, KP1_682507065 * T6) + FNMA(KP1_918985947, T4, KP1_309721467 * T3);
Chris@42 213 R0[WS(rs, 3)] = T7 - Td;
Chris@42 214 R0[WS(rs, 4)] = Te - Tf;
Chris@42 215 R0[WS(rs, 2)] = Tk + Tl;
Chris@42 216 R1[WS(rs, 2)] = T7 + Td;
Chris@42 217 R1[WS(rs, 3)] = Tk - Tl;
Chris@42 218 R0[WS(rs, 1)] = Ti + Tj;
Chris@42 219 R1[WS(rs, 1)] = Te + Tf;
Chris@42 220 R0[WS(rs, 5)] = Tg + Th;
Chris@42 221 R1[0] = Tg - Th;
Chris@42 222 R1[WS(rs, 4)] = Ti - Tj;
Chris@42 223 R0[0] = FMA(KP2_000000000, T2 + T3 + T4 + T5 + T6, T1);
Chris@42 224 }
Chris@42 225 }
Chris@42 226 }
Chris@42 227
Chris@42 228 static const kr2c_desc desc = { 11, "r2cb_11", {19, 10, 41, 0}, &GENUS };
Chris@42 229
Chris@42 230 void X(codelet_r2cb_11) (planner *p) {
Chris@42 231 X(kr2c_register) (p, r2cb_11, &desc);
Chris@42 232 }
Chris@42 233
Chris@42 234 #endif /* HAVE_FMA */