annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:05 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 64 FP additions, 35 FP multiplications,
Chris@42 32 * (or, 36 additions, 7 multiplications, 28 fused multiply/add),
Chris@42 33 * 50 stack variables, 8 constants, and 30 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP910592997, +0.910592997310029334643087372129977886038870291);
Chris@42 40 DK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@42 41 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 42 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 43 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 44 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 45 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 47 {
Chris@42 48 INT i;
Chris@42 49 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@42 50 E Tw, Tz, Tp, Ty;
Chris@42 51 {
Chris@42 52 E Ti, TF, TR, TN, TX, T11, TM, TS, Tl, TH, Tf, To, TT, TD, Tg;
Chris@42 53 E Th;
Chris@42 54 TD = R0[0];
Chris@42 55 Tg = R0[WS(rs, 5)];
Chris@42 56 Th = R1[WS(rs, 2)];
Chris@42 57 {
Chris@42 58 E Tj, Tq, Tt, Tm, T3, Tk, T4, Ta, Tr, Td, Tu, T5, TE;
Chris@42 59 Tj = R1[WS(rs, 1)];
Chris@42 60 Tq = R0[WS(rs, 3)];
Chris@42 61 Tt = R1[WS(rs, 4)];
Chris@42 62 TE = Th + Tg;
Chris@42 63 Ti = Tg - Th;
Chris@42 64 Tm = R0[WS(rs, 6)];
Chris@42 65 {
Chris@42 66 E T8, T9, T1, T2, Tb, Tc;
Chris@42 67 T1 = R0[WS(rs, 4)];
Chris@42 68 T2 = R1[WS(rs, 6)];
Chris@42 69 TF = FNMS(KP500000000, TE, TD);
Chris@42 70 TR = TD + TE;
Chris@42 71 T8 = R1[WS(rs, 5)];
Chris@42 72 T3 = T1 - T2;
Chris@42 73 Tk = T1 + T2;
Chris@42 74 T9 = R1[0];
Chris@42 75 Tb = R0[WS(rs, 7)];
Chris@42 76 Tc = R0[WS(rs, 2)];
Chris@42 77 T4 = R0[WS(rs, 1)];
Chris@42 78 Ta = T8 - T9;
Chris@42 79 Tr = T8 + T9;
Chris@42 80 Td = Tb - Tc;
Chris@42 81 Tu = Tb + Tc;
Chris@42 82 T5 = R1[WS(rs, 3)];
Chris@42 83 }
Chris@42 84 {
Chris@42 85 E Ts, Tv, Te, Tn, T7, T6, TV, TW;
Chris@42 86 TV = Tq + Tr;
Chris@42 87 Ts = FNMS(KP500000000, Tr, Tq);
Chris@42 88 Tv = FNMS(KP500000000, Tu, Tt);
Chris@42 89 TW = Tt + Tu;
Chris@42 90 Te = Ta + Td;
Chris@42 91 TN = Td - Ta;
Chris@42 92 Tn = T4 + T5;
Chris@42 93 T6 = T4 - T5;
Chris@42 94 TX = TV + TW;
Chris@42 95 T11 = TW - TV;
Chris@42 96 TM = T6 - T3;
Chris@42 97 T7 = T3 + T6;
Chris@42 98 TS = Tj + Tk;
Chris@42 99 Tl = FNMS(KP500000000, Tk, Tj);
Chris@42 100 TH = Ts + Tv;
Chris@42 101 Tw = Ts - Tv;
Chris@42 102 Tz = Te - T7;
Chris@42 103 Tf = T7 + Te;
Chris@42 104 To = FNMS(KP500000000, Tn, Tm);
Chris@42 105 TT = Tm + Tn;
Chris@42 106 }
Chris@42 107 }
Chris@42 108 {
Chris@42 109 E TO, TQ, TU, T12, TK, TI, TG;
Chris@42 110 Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti);
Chris@42 111 TG = Tl + To;
Chris@42 112 Tp = Tl - To;
Chris@42 113 TO = FMA(KP618033988, TN, TM);
Chris@42 114 TQ = FNMS(KP618033988, TM, TN);
Chris@42 115 TU = TS + TT;
Chris@42 116 T12 = TS - TT;
Chris@42 117 TK = TG - TH;
Chris@42 118 TI = TG + TH;
Chris@42 119 {
Chris@42 120 E T10, TY, TL, TP, TJ, TZ;
Chris@42 121 T10 = TU - TX;
Chris@42 122 TY = TU + TX;
Chris@42 123 Cr[WS(csr, 5)] = TF + TI;
Chris@42 124 TJ = FNMS(KP250000000, TI, TF);
Chris@42 125 Ci[WS(csi, 6)] = -(KP951056516 * (FNMS(KP618033988, T11, T12)));
Chris@42 126 Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, T12, T11));
Chris@42 127 TL = FMA(KP559016994, TK, TJ);
Chris@42 128 TP = FNMS(KP559016994, TK, TJ);
Chris@42 129 Cr[0] = TR + TY;
Chris@42 130 TZ = FNMS(KP250000000, TY, TR);
Chris@42 131 Cr[WS(csr, 4)] = FNMS(KP823639103, TO, TL);
Chris@42 132 Cr[WS(csr, 1)] = FMA(KP823639103, TO, TL);
Chris@42 133 Cr[WS(csr, 7)] = FNMS(KP823639103, TQ, TP);
Chris@42 134 Cr[WS(csr, 2)] = FMA(KP823639103, TQ, TP);
Chris@42 135 Cr[WS(csr, 6)] = FMA(KP559016994, T10, TZ);
Chris@42 136 Cr[WS(csr, 3)] = FNMS(KP559016994, T10, TZ);
Chris@42 137 Ty = FMA(KP250000000, Tf, Ti);
Chris@42 138 }
Chris@42 139 }
Chris@42 140 }
Chris@42 141 {
Chris@42 142 E TB, Tx, TC, TA;
Chris@42 143 TB = FNMS(KP618033988, Tp, Tw);
Chris@42 144 Tx = FMA(KP618033988, Tw, Tp);
Chris@42 145 TC = FNMS(KP559016994, Tz, Ty);
Chris@42 146 TA = FMA(KP559016994, Tz, Ty);
Chris@42 147 Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP910592997, TC, TB));
Chris@42 148 Ci[WS(csi, 7)] = KP951056516 * (FMA(KP910592997, TC, TB));
Chris@42 149 Ci[WS(csi, 4)] = KP951056516 * (FMA(KP910592997, TA, Tx));
Chris@42 150 Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP910592997, TA, Tx)));
Chris@42 151 }
Chris@42 152 }
Chris@42 153 }
Chris@42 154 }
Chris@42 155
Chris@42 156 static const kr2c_desc desc = { 15, "r2cf_15", {36, 7, 28, 0}, &GENUS };
Chris@42 157
Chris@42 158 void X(codelet_r2cf_15) (planner *p) {
Chris@42 159 X(kr2c_register) (p, r2cf_15, &desc);
Chris@42 160 }
Chris@42 161
Chris@42 162 #else /* HAVE_FMA */
Chris@42 163
Chris@42 164 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include r2cf.h */
Chris@42 165
Chris@42 166 /*
Chris@42 167 * This function contains 64 FP additions, 25 FP multiplications,
Chris@42 168 * (or, 50 additions, 11 multiplications, 14 fused multiply/add),
Chris@42 169 * 47 stack variables, 10 constants, and 30 memory accesses
Chris@42 170 */
Chris@42 171 #include "r2cf.h"
Chris@42 172
Chris@42 173 static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 174 {
Chris@42 175 DK(KP484122918, +0.484122918275927110647408174972799951354115213);
Chris@42 176 DK(KP216506350, +0.216506350946109661690930792688234045867850657);
Chris@42 177 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 178 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 179 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 180 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 181 DK(KP509036960, +0.509036960455127183450980863393907648510733164);
Chris@42 182 DK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@42 183 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 184 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 185 {
Chris@42 186 INT i;
Chris@42 187 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@42 188 E Ti, TR, TL, TD, TE, T7, Te, Tf, TV, TW, TX, Tv, Ty, TH, To;
Chris@42 189 E Tr, TG, TS, TT, TU;
Chris@42 190 {
Chris@42 191 E TJ, Tg, Th, TK;
Chris@42 192 TJ = R0[0];
Chris@42 193 Tg = R0[WS(rs, 5)];
Chris@42 194 Th = R1[WS(rs, 2)];
Chris@42 195 TK = Th + Tg;
Chris@42 196 Ti = Tg - Th;
Chris@42 197 TR = TJ + TK;
Chris@42 198 TL = FNMS(KP500000000, TK, TJ);
Chris@42 199 }
Chris@42 200 {
Chris@42 201 E Tm, Tt, Tw, Tp, T3, Tx, Ta, Tn, Td, Tq, T6, Tu;
Chris@42 202 Tm = R1[WS(rs, 1)];
Chris@42 203 Tt = R0[WS(rs, 3)];
Chris@42 204 Tw = R1[WS(rs, 4)];
Chris@42 205 Tp = R0[WS(rs, 6)];
Chris@42 206 {
Chris@42 207 E T1, T2, T8, T9;
Chris@42 208 T1 = R0[WS(rs, 7)];
Chris@42 209 T2 = R0[WS(rs, 2)];
Chris@42 210 T3 = T1 - T2;
Chris@42 211 Tx = T1 + T2;
Chris@42 212 T8 = R1[WS(rs, 6)];
Chris@42 213 T9 = R0[WS(rs, 4)];
Chris@42 214 Ta = T8 - T9;
Chris@42 215 Tn = T9 + T8;
Chris@42 216 }
Chris@42 217 {
Chris@42 218 E Tb, Tc, T4, T5;
Chris@42 219 Tb = R1[WS(rs, 3)];
Chris@42 220 Tc = R0[WS(rs, 1)];
Chris@42 221 Td = Tb - Tc;
Chris@42 222 Tq = Tc + Tb;
Chris@42 223 T4 = R1[0];
Chris@42 224 T5 = R1[WS(rs, 5)];
Chris@42 225 T6 = T4 - T5;
Chris@42 226 Tu = T5 + T4;
Chris@42 227 }
Chris@42 228 TD = Ta - Td;
Chris@42 229 TE = T6 + T3;
Chris@42 230 T7 = T3 - T6;
Chris@42 231 Te = Ta + Td;
Chris@42 232 Tf = T7 - Te;
Chris@42 233 TV = Tt + Tu;
Chris@42 234 TW = Tw + Tx;
Chris@42 235 TX = TV + TW;
Chris@42 236 Tv = FNMS(KP500000000, Tu, Tt);
Chris@42 237 Ty = FNMS(KP500000000, Tx, Tw);
Chris@42 238 TH = Tv + Ty;
Chris@42 239 To = FNMS(KP500000000, Tn, Tm);
Chris@42 240 Tr = FNMS(KP500000000, Tq, Tp);
Chris@42 241 TG = To + Tr;
Chris@42 242 TS = Tm + Tn;
Chris@42 243 TT = Tp + Tq;
Chris@42 244 TU = TS + TT;
Chris@42 245 }
Chris@42 246 Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti);
Chris@42 247 {
Chris@42 248 E TF, TP, TI, TM, TN, TQ, TO;
Chris@42 249 TF = FMA(KP823639103, TD, KP509036960 * TE);
Chris@42 250 TP = FNMS(KP509036960, TD, KP823639103 * TE);
Chris@42 251 TI = KP559016994 * (TG - TH);
Chris@42 252 TM = TG + TH;
Chris@42 253 TN = FNMS(KP250000000, TM, TL);
Chris@42 254 Cr[WS(csr, 5)] = TL + TM;
Chris@42 255 TQ = TN - TI;
Chris@42 256 Cr[WS(csr, 2)] = TP + TQ;
Chris@42 257 Cr[WS(csr, 7)] = TQ - TP;
Chris@42 258 TO = TI + TN;
Chris@42 259 Cr[WS(csr, 1)] = TF + TO;
Chris@42 260 Cr[WS(csr, 4)] = TO - TF;
Chris@42 261 }
Chris@42 262 {
Chris@42 263 E T11, T12, T10, TY, TZ;
Chris@42 264 T11 = TS - TT;
Chris@42 265 T12 = TW - TV;
Chris@42 266 Ci[WS(csi, 3)] = FMA(KP587785252, T11, KP951056516 * T12);
Chris@42 267 Ci[WS(csi, 6)] = FNMS(KP951056516, T11, KP587785252 * T12);
Chris@42 268 T10 = KP559016994 * (TU - TX);
Chris@42 269 TY = TU + TX;
Chris@42 270 TZ = FNMS(KP250000000, TY, TR);
Chris@42 271 Cr[WS(csr, 3)] = TZ - T10;
Chris@42 272 Cr[0] = TR + TY;
Chris@42 273 Cr[WS(csr, 6)] = T10 + TZ;
Chris@42 274 {
Chris@42 275 E Tl, TB, TA, TC;
Chris@42 276 {
Chris@42 277 E Tj, Tk, Ts, Tz;
Chris@42 278 Tj = FMA(KP866025403, Ti, KP216506350 * Tf);
Chris@42 279 Tk = KP484122918 * (Te + T7);
Chris@42 280 Tl = Tj + Tk;
Chris@42 281 TB = Tk - Tj;
Chris@42 282 Ts = To - Tr;
Chris@42 283 Tz = Tv - Ty;
Chris@42 284 TA = FMA(KP951056516, Ts, KP587785252 * Tz);
Chris@42 285 TC = FNMS(KP587785252, Ts, KP951056516 * Tz);
Chris@42 286 }
Chris@42 287 Ci[WS(csi, 1)] = Tl - TA;
Chris@42 288 Ci[WS(csi, 7)] = TC - TB;
Chris@42 289 Ci[WS(csi, 4)] = Tl + TA;
Chris@42 290 Ci[WS(csi, 2)] = TB + TC;
Chris@42 291 }
Chris@42 292 }
Chris@42 293 }
Chris@42 294 }
Chris@42 295 }
Chris@42 296
Chris@42 297 static const kr2c_desc desc = { 15, "r2cf_15", {50, 11, 14, 0}, &GENUS };
Chris@42 298
Chris@42 299 void X(codelet_r2cf_15) (planner *p) {
Chris@42 300 X(kr2c_register) (p, r2cf_15, &desc);
Chris@42 301 }
Chris@42 302
Chris@42 303 #endif /* HAVE_FMA */