annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cb_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:28 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cb_15 -include rdft/scalar/r2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 64 FP additions, 43 FP multiplications,
Chris@82 32 * (or, 21 additions, 0 multiplications, 43 fused multiply/add),
Chris@82 33 * 46 stack variables, 9 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cb.h"
Chris@82 36
Chris@82 37 static void r2cb_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 40 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 43 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 44 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 45 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 46 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 47 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 48 {
Chris@82 49 INT i;
Chris@82 50 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 51 E T3, Tt, Th, TC, TY, TZ, TD, TH, TI, Tm, Tu, Tr, Tv, T8, Td;
Chris@82 52 E Te;
Chris@82 53 {
Chris@82 54 E Tg, T1, T2, Tf;
Chris@82 55 Tg = Ci[WS(csi, 5)];
Chris@82 56 T1 = Cr[0];
Chris@82 57 T2 = Cr[WS(csr, 5)];
Chris@82 58 Tf = T1 - T2;
Chris@82 59 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 60 Tt = FNMS(KP1_732050807, Tg, Tf);
Chris@82 61 Th = FMA(KP1_732050807, Tg, Tf);
Chris@82 62 }
Chris@82 63 {
Chris@82 64 E T4, TA, T9, TF, T5, T6, T7, Ta, Tb, Tc, Tq, TG, Tl, TB, Ti;
Chris@82 65 E Tn;
Chris@82 66 T4 = Cr[WS(csr, 3)];
Chris@82 67 TA = Ci[WS(csi, 3)];
Chris@82 68 T9 = Cr[WS(csr, 6)];
Chris@82 69 TF = Ci[WS(csi, 6)];
Chris@82 70 T5 = Cr[WS(csr, 7)];
Chris@82 71 T6 = Cr[WS(csr, 2)];
Chris@82 72 T7 = T5 + T6;
Chris@82 73 Ta = Cr[WS(csr, 4)];
Chris@82 74 Tb = Cr[WS(csr, 1)];
Chris@82 75 Tc = Ta + Tb;
Chris@82 76 {
Chris@82 77 E To, Tp, Tj, Tk;
Chris@82 78 To = Ci[WS(csi, 4)];
Chris@82 79 Tp = Ci[WS(csi, 1)];
Chris@82 80 Tq = To + Tp;
Chris@82 81 TG = Tp - To;
Chris@82 82 Tj = Ci[WS(csi, 7)];
Chris@82 83 Tk = Ci[WS(csi, 2)];
Chris@82 84 Tl = Tj - Tk;
Chris@82 85 TB = Tj + Tk;
Chris@82 86 }
Chris@82 87 TC = FMA(KP500000000, TB, TA);
Chris@82 88 TY = TG + TF;
Chris@82 89 TZ = TA - TB;
Chris@82 90 TD = T5 - T6;
Chris@82 91 TH = FNMS(KP500000000, TG, TF);
Chris@82 92 TI = Ta - Tb;
Chris@82 93 Ti = FNMS(KP2_000000000, T4, T7);
Chris@82 94 Tm = FMA(KP1_732050807, Tl, Ti);
Chris@82 95 Tu = FNMS(KP1_732050807, Tl, Ti);
Chris@82 96 Tn = FNMS(KP2_000000000, T9, Tc);
Chris@82 97 Tr = FMA(KP1_732050807, Tq, Tn);
Chris@82 98 Tv = FNMS(KP1_732050807, Tq, Tn);
Chris@82 99 T8 = T4 + T7;
Chris@82 100 Td = T9 + Tc;
Chris@82 101 Te = T8 + Td;
Chris@82 102 }
Chris@82 103 R0[0] = FMA(KP2_000000000, Te, T3);
Chris@82 104 {
Chris@82 105 E T10, T12, TX, T11, TV, TW;
Chris@82 106 T10 = FNMS(KP618033988, TZ, TY);
Chris@82 107 T12 = FMA(KP618033988, TY, TZ);
Chris@82 108 TV = FNMS(KP500000000, Te, T3);
Chris@82 109 TW = T8 - Td;
Chris@82 110 TX = FNMS(KP1_118033988, TW, TV);
Chris@82 111 T11 = FMA(KP1_118033988, TW, TV);
Chris@82 112 R1[WS(rs, 1)] = FNMS(KP1_902113032, T10, TX);
Chris@82 113 R1[WS(rs, 4)] = FMA(KP1_902113032, T12, T11);
Chris@82 114 R0[WS(rs, 6)] = FMA(KP1_902113032, T10, TX);
Chris@82 115 R0[WS(rs, 3)] = FNMS(KP1_902113032, T12, T11);
Chris@82 116 }
Chris@82 117 {
Chris@82 118 E TO, Ts, TN, TS, TU, TQ, TR, TT, TP;
Chris@82 119 TO = Tr - Tm;
Chris@82 120 Ts = Tm + Tr;
Chris@82 121 TN = FMA(KP250000000, Ts, Th);
Chris@82 122 TQ = FNMS(KP866025403, TI, TH);
Chris@82 123 TR = FNMS(KP866025403, TD, TC);
Chris@82 124 TS = FNMS(KP618033988, TR, TQ);
Chris@82 125 TU = FMA(KP618033988, TQ, TR);
Chris@82 126 R1[WS(rs, 2)] = Th - Ts;
Chris@82 127 TT = FMA(KP559016994, TO, TN);
Chris@82 128 R1[WS(rs, 5)] = FNMS(KP1_902113032, TU, TT);
Chris@82 129 R0[WS(rs, 7)] = FMA(KP1_902113032, TU, TT);
Chris@82 130 TP = FNMS(KP559016994, TO, TN);
Chris@82 131 R0[WS(rs, 4)] = FNMS(KP1_902113032, TS, TP);
Chris@82 132 R0[WS(rs, 1)] = FMA(KP1_902113032, TS, TP);
Chris@82 133 }
Chris@82 134 {
Chris@82 135 E Ty, Tw, Tx, TK, TM, TE, TJ, TL, Tz;
Chris@82 136 Ty = Tv - Tu;
Chris@82 137 Tw = Tu + Tv;
Chris@82 138 Tx = FMA(KP250000000, Tw, Tt);
Chris@82 139 TE = FMA(KP866025403, TD, TC);
Chris@82 140 TJ = FMA(KP866025403, TI, TH);
Chris@82 141 TK = FMA(KP618033988, TJ, TE);
Chris@82 142 TM = FNMS(KP618033988, TE, TJ);
Chris@82 143 R0[WS(rs, 5)] = Tt - Tw;
Chris@82 144 TL = FNMS(KP559016994, Ty, Tx);
Chris@82 145 R1[WS(rs, 6)] = FNMS(KP1_902113032, TM, TL);
Chris@82 146 R1[WS(rs, 3)] = FMA(KP1_902113032, TM, TL);
Chris@82 147 Tz = FMA(KP559016994, Ty, Tx);
Chris@82 148 R1[0] = FNMS(KP1_902113032, TK, Tz);
Chris@82 149 R0[WS(rs, 2)] = FMA(KP1_902113032, TK, Tz);
Chris@82 150 }
Chris@82 151 }
Chris@82 152 }
Chris@82 153 }
Chris@82 154
Chris@82 155 static const kr2c_desc desc = { 15, "r2cb_15", {21, 0, 43, 0}, &GENUS };
Chris@82 156
Chris@82 157 void X(codelet_r2cb_15) (planner *p) {
Chris@82 158 X(kr2c_register) (p, r2cb_15, &desc);
Chris@82 159 }
Chris@82 160
Chris@82 161 #else
Chris@82 162
Chris@82 163 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cb_15 -include rdft/scalar/r2cb.h */
Chris@82 164
Chris@82 165 /*
Chris@82 166 * This function contains 64 FP additions, 31 FP multiplications,
Chris@82 167 * (or, 47 additions, 14 multiplications, 17 fused multiply/add),
Chris@82 168 * 44 stack variables, 7 constants, and 30 memory accesses
Chris@82 169 */
Chris@82 170 #include "rdft/scalar/r2cb.h"
Chris@82 171
Chris@82 172 static void r2cb_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 173 {
Chris@82 174 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 175 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 176 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@82 177 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 178 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 179 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 180 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 181 {
Chris@82 182 INT i;
Chris@82 183 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 184 E T3, Tu, Ti, TB, TZ, T10, TE, TG, TJ, Tn, Tv, Ts, Tw, T8, Td;
Chris@82 185 E Te;
Chris@82 186 {
Chris@82 187 E Th, T1, T2, Tf, Tg;
Chris@82 188 Tg = Ci[WS(csi, 5)];
Chris@82 189 Th = KP1_732050807 * Tg;
Chris@82 190 T1 = Cr[0];
Chris@82 191 T2 = Cr[WS(csr, 5)];
Chris@82 192 Tf = T1 - T2;
Chris@82 193 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 194 Tu = Tf - Th;
Chris@82 195 Ti = Tf + Th;
Chris@82 196 }
Chris@82 197 {
Chris@82 198 E T4, TD, T9, TI, T5, T6, T7, Ta, Tb, Tc, Tr, TH, Tm, TC, Tj;
Chris@82 199 E To;
Chris@82 200 T4 = Cr[WS(csr, 3)];
Chris@82 201 TD = Ci[WS(csi, 3)];
Chris@82 202 T9 = Cr[WS(csr, 6)];
Chris@82 203 TI = Ci[WS(csi, 6)];
Chris@82 204 T5 = Cr[WS(csr, 7)];
Chris@82 205 T6 = Cr[WS(csr, 2)];
Chris@82 206 T7 = T5 + T6;
Chris@82 207 Ta = Cr[WS(csr, 4)];
Chris@82 208 Tb = Cr[WS(csr, 1)];
Chris@82 209 Tc = Ta + Tb;
Chris@82 210 {
Chris@82 211 E Tp, Tq, Tk, Tl;
Chris@82 212 Tp = Ci[WS(csi, 4)];
Chris@82 213 Tq = Ci[WS(csi, 1)];
Chris@82 214 Tr = KP866025403 * (Tp + Tq);
Chris@82 215 TH = Tp - Tq;
Chris@82 216 Tk = Ci[WS(csi, 7)];
Chris@82 217 Tl = Ci[WS(csi, 2)];
Chris@82 218 Tm = KP866025403 * (Tk - Tl);
Chris@82 219 TC = Tk + Tl;
Chris@82 220 }
Chris@82 221 TB = KP866025403 * (T5 - T6);
Chris@82 222 TZ = TD - TC;
Chris@82 223 T10 = TI - TH;
Chris@82 224 TE = FMA(KP500000000, TC, TD);
Chris@82 225 TG = KP866025403 * (Ta - Tb);
Chris@82 226 TJ = FMA(KP500000000, TH, TI);
Chris@82 227 Tj = FNMS(KP500000000, T7, T4);
Chris@82 228 Tn = Tj - Tm;
Chris@82 229 Tv = Tj + Tm;
Chris@82 230 To = FNMS(KP500000000, Tc, T9);
Chris@82 231 Ts = To - Tr;
Chris@82 232 Tw = To + Tr;
Chris@82 233 T8 = T4 + T7;
Chris@82 234 Td = T9 + Tc;
Chris@82 235 Te = T8 + Td;
Chris@82 236 }
Chris@82 237 R0[0] = FMA(KP2_000000000, Te, T3);
Chris@82 238 {
Chris@82 239 E T11, T13, TY, T12, TW, TX;
Chris@82 240 T11 = FNMS(KP1_902113032, T10, KP1_175570504 * TZ);
Chris@82 241 T13 = FMA(KP1_902113032, TZ, KP1_175570504 * T10);
Chris@82 242 TW = FNMS(KP500000000, Te, T3);
Chris@82 243 TX = KP1_118033988 * (T8 - Td);
Chris@82 244 TY = TW - TX;
Chris@82 245 T12 = TX + TW;
Chris@82 246 R0[WS(rs, 6)] = TY - T11;
Chris@82 247 R1[WS(rs, 4)] = T12 + T13;
Chris@82 248 R1[WS(rs, 1)] = TY + T11;
Chris@82 249 R0[WS(rs, 3)] = T12 - T13;
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E TP, Tt, TO, TT, TV, TR, TS, TU, TQ;
Chris@82 253 TP = KP1_118033988 * (Tn - Ts);
Chris@82 254 Tt = Tn + Ts;
Chris@82 255 TO = FNMS(KP500000000, Tt, Ti);
Chris@82 256 TR = TE - TB;
Chris@82 257 TS = TJ - TG;
Chris@82 258 TT = FNMS(KP1_902113032, TS, KP1_175570504 * TR);
Chris@82 259 TV = FMA(KP1_902113032, TR, KP1_175570504 * TS);
Chris@82 260 R1[WS(rs, 2)] = FMA(KP2_000000000, Tt, Ti);
Chris@82 261 TU = TP + TO;
Chris@82 262 R1[WS(rs, 5)] = TU - TV;
Chris@82 263 R0[WS(rs, 7)] = TU + TV;
Chris@82 264 TQ = TO - TP;
Chris@82 265 R0[WS(rs, 1)] = TQ - TT;
Chris@82 266 R0[WS(rs, 4)] = TQ + TT;
Chris@82 267 }
Chris@82 268 {
Chris@82 269 E Tz, Tx, Ty, TL, TN, TF, TK, TM, TA;
Chris@82 270 Tz = KP1_118033988 * (Tv - Tw);
Chris@82 271 Tx = Tv + Tw;
Chris@82 272 Ty = FNMS(KP500000000, Tx, Tu);
Chris@82 273 TF = TB + TE;
Chris@82 274 TK = TG + TJ;
Chris@82 275 TL = FNMS(KP1_902113032, TK, KP1_175570504 * TF);
Chris@82 276 TN = FMA(KP1_902113032, TF, KP1_175570504 * TK);
Chris@82 277 R0[WS(rs, 5)] = FMA(KP2_000000000, Tx, Tu);
Chris@82 278 TM = Tz + Ty;
Chris@82 279 R1[0] = TM - TN;
Chris@82 280 R0[WS(rs, 2)] = TM + TN;
Chris@82 281 TA = Ty - Tz;
Chris@82 282 R1[WS(rs, 3)] = TA - TL;
Chris@82 283 R1[WS(rs, 6)] = TA + TL;
Chris@82 284 }
Chris@82 285 }
Chris@82 286 }
Chris@82 287 }
Chris@82 288
Chris@82 289 static const kr2c_desc desc = { 15, "r2cb_15", {47, 14, 17, 0}, &GENUS };
Chris@82 290
Chris@82 291 void X(codelet_r2cb_15) (planner *p) {
Chris@82 292 X(kr2c_register) (p, r2cb_15, &desc);
Chris@82 293 }
Chris@82 294
Chris@82 295 #endif