annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:26 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 64 FP additions, 35 FP multiplications,
Chris@82 32 * (or, 36 additions, 7 multiplications, 28 fused multiply/add),
Chris@82 33 * 45 stack variables, 8 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP910592997, +0.910592997310029334643087372129977886038870291);
Chris@82 40 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 41 DK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 42 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 43 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 44 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 45 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 46 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 47 {
Chris@82 48 INT i;
Chris@82 49 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 50 E Ti, TR, TF, TM, TN, T7, Te, Tf, TV, TW, TX, Ts, Tv, TH, Tl;
Chris@82 51 E To, TG, TS, TT, TU;
Chris@82 52 {
Chris@82 53 E TD, Tg, Th, TE;
Chris@82 54 TD = R0[0];
Chris@82 55 Tg = R0[WS(rs, 5)];
Chris@82 56 Th = R1[WS(rs, 2)];
Chris@82 57 TE = Th + Tg;
Chris@82 58 Ti = Tg - Th;
Chris@82 59 TR = TD + TE;
Chris@82 60 TF = FNMS(KP500000000, TE, TD);
Chris@82 61 }
Chris@82 62 {
Chris@82 63 E Tj, Tq, Tt, Tm, T3, Tk, Ta, Tr, Td, Tu, T6, Tn;
Chris@82 64 Tj = R1[WS(rs, 1)];
Chris@82 65 Tq = R0[WS(rs, 3)];
Chris@82 66 Tt = R1[WS(rs, 4)];
Chris@82 67 Tm = R0[WS(rs, 6)];
Chris@82 68 {
Chris@82 69 E T1, T2, T8, T9;
Chris@82 70 T1 = R0[WS(rs, 4)];
Chris@82 71 T2 = R1[WS(rs, 6)];
Chris@82 72 T3 = T1 - T2;
Chris@82 73 Tk = T1 + T2;
Chris@82 74 T8 = R1[WS(rs, 5)];
Chris@82 75 T9 = R1[0];
Chris@82 76 Ta = T8 - T9;
Chris@82 77 Tr = T8 + T9;
Chris@82 78 }
Chris@82 79 {
Chris@82 80 E Tb, Tc, T4, T5;
Chris@82 81 Tb = R0[WS(rs, 7)];
Chris@82 82 Tc = R0[WS(rs, 2)];
Chris@82 83 Td = Tb - Tc;
Chris@82 84 Tu = Tb + Tc;
Chris@82 85 T4 = R0[WS(rs, 1)];
Chris@82 86 T5 = R1[WS(rs, 3)];
Chris@82 87 T6 = T4 - T5;
Chris@82 88 Tn = T4 + T5;
Chris@82 89 }
Chris@82 90 TM = T6 - T3;
Chris@82 91 TN = Td - Ta;
Chris@82 92 T7 = T3 + T6;
Chris@82 93 Te = Ta + Td;
Chris@82 94 Tf = T7 + Te;
Chris@82 95 TV = Tq + Tr;
Chris@82 96 TW = Tt + Tu;
Chris@82 97 TX = TV + TW;
Chris@82 98 Ts = FNMS(KP500000000, Tr, Tq);
Chris@82 99 Tv = FNMS(KP500000000, Tu, Tt);
Chris@82 100 TH = Ts + Tv;
Chris@82 101 Tl = FNMS(KP500000000, Tk, Tj);
Chris@82 102 To = FNMS(KP500000000, Tn, Tm);
Chris@82 103 TG = Tl + To;
Chris@82 104 TS = Tj + Tk;
Chris@82 105 TT = Tm + Tn;
Chris@82 106 TU = TS + TT;
Chris@82 107 }
Chris@82 108 Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti);
Chris@82 109 {
Chris@82 110 E TK, TQ, TO, TI, TJ, TP, TL;
Chris@82 111 TK = TG - TH;
Chris@82 112 TQ = FNMS(KP618033988, TM, TN);
Chris@82 113 TO = FMA(KP618033988, TN, TM);
Chris@82 114 TI = TG + TH;
Chris@82 115 TJ = FNMS(KP250000000, TI, TF);
Chris@82 116 Cr[WS(csr, 5)] = TF + TI;
Chris@82 117 TP = FNMS(KP559016994, TK, TJ);
Chris@82 118 Cr[WS(csr, 2)] = FMA(KP823639103, TQ, TP);
Chris@82 119 Cr[WS(csr, 7)] = FNMS(KP823639103, TQ, TP);
Chris@82 120 TL = FMA(KP559016994, TK, TJ);
Chris@82 121 Cr[WS(csr, 1)] = FMA(KP823639103, TO, TL);
Chris@82 122 Cr[WS(csr, 4)] = FNMS(KP823639103, TO, TL);
Chris@82 123 }
Chris@82 124 {
Chris@82 125 E T11, T12, T10, TY, TZ;
Chris@82 126 T11 = TW - TV;
Chris@82 127 T12 = TS - TT;
Chris@82 128 Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, T12, T11));
Chris@82 129 Ci[WS(csi, 6)] = -(KP951056516 * (FNMS(KP618033988, T11, T12)));
Chris@82 130 T10 = TU - TX;
Chris@82 131 TY = TU + TX;
Chris@82 132 TZ = FNMS(KP250000000, TY, TR);
Chris@82 133 Cr[WS(csr, 3)] = FNMS(KP559016994, T10, TZ);
Chris@82 134 Cr[0] = TR + TY;
Chris@82 135 Cr[WS(csr, 6)] = FMA(KP559016994, T10, TZ);
Chris@82 136 {
Chris@82 137 E Tx, TB, TA, TC;
Chris@82 138 {
Chris@82 139 E Tp, Tw, Ty, Tz;
Chris@82 140 Tp = Tl - To;
Chris@82 141 Tw = Ts - Tv;
Chris@82 142 Tx = FMA(KP618033988, Tw, Tp);
Chris@82 143 TB = FNMS(KP618033988, Tp, Tw);
Chris@82 144 Ty = FMA(KP250000000, Tf, Ti);
Chris@82 145 Tz = Te - T7;
Chris@82 146 TA = FMA(KP559016994, Tz, Ty);
Chris@82 147 TC = FNMS(KP559016994, Tz, Ty);
Chris@82 148 }
Chris@82 149 Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP910592997, TA, Tx)));
Chris@82 150 Ci[WS(csi, 7)] = KP951056516 * (FMA(KP910592997, TC, TB));
Chris@82 151 Ci[WS(csi, 4)] = KP951056516 * (FMA(KP910592997, TA, Tx));
Chris@82 152 Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP910592997, TC, TB));
Chris@82 153 }
Chris@82 154 }
Chris@82 155 }
Chris@82 156 }
Chris@82 157 }
Chris@82 158
Chris@82 159 static const kr2c_desc desc = { 15, "r2cf_15", {36, 7, 28, 0}, &GENUS };
Chris@82 160
Chris@82 161 void X(codelet_r2cf_15) (planner *p) {
Chris@82 162 X(kr2c_register) (p, r2cf_15, &desc);
Chris@82 163 }
Chris@82 164
Chris@82 165 #else
Chris@82 166
Chris@82 167 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cf_15 -include rdft/scalar/r2cf.h */
Chris@82 168
Chris@82 169 /*
Chris@82 170 * This function contains 64 FP additions, 25 FP multiplications,
Chris@82 171 * (or, 50 additions, 11 multiplications, 14 fused multiply/add),
Chris@82 172 * 47 stack variables, 10 constants, and 30 memory accesses
Chris@82 173 */
Chris@82 174 #include "rdft/scalar/r2cf.h"
Chris@82 175
Chris@82 176 static void r2cf_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 177 {
Chris@82 178 DK(KP484122918, +0.484122918275927110647408174972799951354115213);
Chris@82 179 DK(KP216506350, +0.216506350946109661690930792688234045867850657);
Chris@82 180 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 181 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 182 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 183 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 184 DK(KP509036960, +0.509036960455127183450980863393907648510733164);
Chris@82 185 DK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 186 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 187 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 188 {
Chris@82 189 INT i;
Chris@82 190 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 191 E Ti, TR, TL, TD, TE, T7, Te, Tf, TV, TW, TX, Tv, Ty, TH, To;
Chris@82 192 E Tr, TG, TS, TT, TU;
Chris@82 193 {
Chris@82 194 E TJ, Tg, Th, TK;
Chris@82 195 TJ = R0[0];
Chris@82 196 Tg = R0[WS(rs, 5)];
Chris@82 197 Th = R1[WS(rs, 2)];
Chris@82 198 TK = Th + Tg;
Chris@82 199 Ti = Tg - Th;
Chris@82 200 TR = TJ + TK;
Chris@82 201 TL = FNMS(KP500000000, TK, TJ);
Chris@82 202 }
Chris@82 203 {
Chris@82 204 E Tm, Tt, Tw, Tp, T3, Tx, Ta, Tn, Td, Tq, T6, Tu;
Chris@82 205 Tm = R1[WS(rs, 1)];
Chris@82 206 Tt = R0[WS(rs, 3)];
Chris@82 207 Tw = R1[WS(rs, 4)];
Chris@82 208 Tp = R0[WS(rs, 6)];
Chris@82 209 {
Chris@82 210 E T1, T2, T8, T9;
Chris@82 211 T1 = R0[WS(rs, 7)];
Chris@82 212 T2 = R0[WS(rs, 2)];
Chris@82 213 T3 = T1 - T2;
Chris@82 214 Tx = T1 + T2;
Chris@82 215 T8 = R1[WS(rs, 6)];
Chris@82 216 T9 = R0[WS(rs, 4)];
Chris@82 217 Ta = T8 - T9;
Chris@82 218 Tn = T9 + T8;
Chris@82 219 }
Chris@82 220 {
Chris@82 221 E Tb, Tc, T4, T5;
Chris@82 222 Tb = R1[WS(rs, 3)];
Chris@82 223 Tc = R0[WS(rs, 1)];
Chris@82 224 Td = Tb - Tc;
Chris@82 225 Tq = Tc + Tb;
Chris@82 226 T4 = R1[0];
Chris@82 227 T5 = R1[WS(rs, 5)];
Chris@82 228 T6 = T4 - T5;
Chris@82 229 Tu = T5 + T4;
Chris@82 230 }
Chris@82 231 TD = Ta - Td;
Chris@82 232 TE = T6 + T3;
Chris@82 233 T7 = T3 - T6;
Chris@82 234 Te = Ta + Td;
Chris@82 235 Tf = T7 - Te;
Chris@82 236 TV = Tt + Tu;
Chris@82 237 TW = Tw + Tx;
Chris@82 238 TX = TV + TW;
Chris@82 239 Tv = FNMS(KP500000000, Tu, Tt);
Chris@82 240 Ty = FNMS(KP500000000, Tx, Tw);
Chris@82 241 TH = Tv + Ty;
Chris@82 242 To = FNMS(KP500000000, Tn, Tm);
Chris@82 243 Tr = FNMS(KP500000000, Tq, Tp);
Chris@82 244 TG = To + Tr;
Chris@82 245 TS = Tm + Tn;
Chris@82 246 TT = Tp + Tq;
Chris@82 247 TU = TS + TT;
Chris@82 248 }
Chris@82 249 Ci[WS(csi, 5)] = KP866025403 * (Tf - Ti);
Chris@82 250 {
Chris@82 251 E TF, TP, TI, TM, TN, TQ, TO;
Chris@82 252 TF = FMA(KP823639103, TD, KP509036960 * TE);
Chris@82 253 TP = FNMS(KP509036960, TD, KP823639103 * TE);
Chris@82 254 TI = KP559016994 * (TG - TH);
Chris@82 255 TM = TG + TH;
Chris@82 256 TN = FNMS(KP250000000, TM, TL);
Chris@82 257 Cr[WS(csr, 5)] = TL + TM;
Chris@82 258 TQ = TN - TI;
Chris@82 259 Cr[WS(csr, 2)] = TP + TQ;
Chris@82 260 Cr[WS(csr, 7)] = TQ - TP;
Chris@82 261 TO = TI + TN;
Chris@82 262 Cr[WS(csr, 1)] = TF + TO;
Chris@82 263 Cr[WS(csr, 4)] = TO - TF;
Chris@82 264 }
Chris@82 265 {
Chris@82 266 E T11, T12, T10, TY, TZ;
Chris@82 267 T11 = TS - TT;
Chris@82 268 T12 = TW - TV;
Chris@82 269 Ci[WS(csi, 3)] = FMA(KP587785252, T11, KP951056516 * T12);
Chris@82 270 Ci[WS(csi, 6)] = FNMS(KP951056516, T11, KP587785252 * T12);
Chris@82 271 T10 = KP559016994 * (TU - TX);
Chris@82 272 TY = TU + TX;
Chris@82 273 TZ = FNMS(KP250000000, TY, TR);
Chris@82 274 Cr[WS(csr, 3)] = TZ - T10;
Chris@82 275 Cr[0] = TR + TY;
Chris@82 276 Cr[WS(csr, 6)] = T10 + TZ;
Chris@82 277 {
Chris@82 278 E Tl, TB, TA, TC;
Chris@82 279 {
Chris@82 280 E Tj, Tk, Ts, Tz;
Chris@82 281 Tj = FMA(KP866025403, Ti, KP216506350 * Tf);
Chris@82 282 Tk = KP484122918 * (Te + T7);
Chris@82 283 Tl = Tj + Tk;
Chris@82 284 TB = Tk - Tj;
Chris@82 285 Ts = To - Tr;
Chris@82 286 Tz = Tv - Ty;
Chris@82 287 TA = FMA(KP951056516, Ts, KP587785252 * Tz);
Chris@82 288 TC = FNMS(KP587785252, Ts, KP951056516 * Tz);
Chris@82 289 }
Chris@82 290 Ci[WS(csi, 1)] = Tl - TA;
Chris@82 291 Ci[WS(csi, 7)] = TC - TB;
Chris@82 292 Ci[WS(csi, 4)] = Tl + TA;
Chris@82 293 Ci[WS(csi, 2)] = TB + TC;
Chris@82 294 }
Chris@82 295 }
Chris@82 296 }
Chris@82 297 }
Chris@82 298 }
Chris@82 299
Chris@82 300 static const kr2c_desc desc = { 15, "r2cf_15", {50, 11, 14, 0}, &GENUS };
Chris@82 301
Chris@82 302 void X(codelet_r2cf_15) (planner *p) {
Chris@82 303 X(kr2c_register) (p, r2cf_15, &desc);
Chris@82 304 }
Chris@82 305
Chris@82 306 #endif