annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cfII_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:43 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cfII_15 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 72 FP additions, 41 FP multiplications,
Chris@82 32 * (or, 38 additions, 7 multiplications, 34 fused multiply/add),
Chris@82 33 * 42 stack variables, 12 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cfII.h"
Chris@82 36
Chris@82 37 static void r2cfII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 40 DK(KP910592997, +0.910592997310029334643087372129977886038870291);
Chris@82 41 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 42 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 43 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 44 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 45 DK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@82 46 DK(KP447213595, +0.447213595499957939281834733746255247088123672);
Chris@82 47 DK(KP552786404, +0.552786404500042060718165266253744752911876328);
Chris@82 48 DK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@82 49 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 50 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 51 {
Chris@82 52 INT i;
Chris@82 53 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 54 E Ta, Tl, T1, T6, T7, TX, TT, T8, Tg, Th, TM, TZ, Tj, Tz, Tr;
Chris@82 55 E Ts, TP, TY, Tu, TC;
Chris@82 56 Ta = R0[WS(rs, 5)];
Chris@82 57 Tl = R1[WS(rs, 2)];
Chris@82 58 {
Chris@82 59 E T2, T5, T3, T4, TR, TS;
Chris@82 60 T1 = R0[0];
Chris@82 61 T2 = R0[WS(rs, 3)];
Chris@82 62 T5 = R1[WS(rs, 4)];
Chris@82 63 T3 = R0[WS(rs, 6)];
Chris@82 64 T4 = R1[WS(rs, 1)];
Chris@82 65 TR = T2 + T5;
Chris@82 66 TS = T3 + T4;
Chris@82 67 T6 = T2 + T3 - T4 - T5;
Chris@82 68 T7 = FNMS(KP250000000, T6, T1);
Chris@82 69 TX = FNMS(KP618033988, TR, TS);
Chris@82 70 TT = FMA(KP618033988, TS, TR);
Chris@82 71 T8 = (T3 + T5 - T2) - T4;
Chris@82 72 }
Chris@82 73 {
Chris@82 74 E Tf, TL, TK, Ti, Ty;
Chris@82 75 {
Chris@82 76 E Tb, Tc, Td, Te;
Chris@82 77 Tb = R1[0];
Chris@82 78 Tg = R0[WS(rs, 2)];
Chris@82 79 Tc = R1[WS(rs, 3)];
Chris@82 80 Td = R1[WS(rs, 6)];
Chris@82 81 Te = Tc + Td;
Chris@82 82 Tf = Tb - Te;
Chris@82 83 TL = Tc - Td;
Chris@82 84 Th = Tb + Te;
Chris@82 85 TK = Tg + Tb;
Chris@82 86 }
Chris@82 87 TM = FMA(KP618033988, TL, TK);
Chris@82 88 TZ = FNMS(KP618033988, TK, TL);
Chris@82 89 Ti = FMA(KP809016994, Th, Tg);
Chris@82 90 Tj = FNMS(KP552786404, Ti, Tf);
Chris@82 91 Ty = FMA(KP447213595, Th, Tf);
Chris@82 92 Tz = FNMS(KP690983005, Ty, Tg);
Chris@82 93 }
Chris@82 94 {
Chris@82 95 E Tq, TO, TN, Tt, TB;
Chris@82 96 {
Chris@82 97 E Tm, Tn, To, Tp;
Chris@82 98 Tm = R0[WS(rs, 7)];
Chris@82 99 Tr = R1[WS(rs, 5)];
Chris@82 100 Tn = R0[WS(rs, 1)];
Chris@82 101 To = R0[WS(rs, 4)];
Chris@82 102 Tp = Tn + To;
Chris@82 103 Tq = Tm - Tp;
Chris@82 104 TO = To - Tn;
Chris@82 105 Ts = Tm + Tp;
Chris@82 106 TN = Tr + Tm;
Chris@82 107 }
Chris@82 108 TP = FMA(KP618033988, TO, TN);
Chris@82 109 TY = FNMS(KP618033988, TN, TO);
Chris@82 110 Tt = FMA(KP809016994, Ts, Tr);
Chris@82 111 Tu = FNMS(KP552786404, Tt, Tq);
Chris@82 112 TB = FMA(KP447213595, Ts, Tq);
Chris@82 113 TC = FNMS(KP690983005, TB, Tr);
Chris@82 114 }
Chris@82 115 {
Chris@82 116 E TF, TG, TH, TI;
Chris@82 117 TF = T1 + T6;
Chris@82 118 TG = Ts - Tr - Tl;
Chris@82 119 TH = Ta + Tg - Th;
Chris@82 120 TI = TG + TH;
Chris@82 121 Cr[WS(csr, 2)] = FNMS(KP500000000, TI, TF);
Chris@82 122 Ci[WS(csi, 2)] = KP866025403 * (TH - TG);
Chris@82 123 Cr[WS(csr, 7)] = TF + TI;
Chris@82 124 }
Chris@82 125 {
Chris@82 126 E Tx, T14, T10, T11, TE, T12, TA, TD, T13;
Chris@82 127 Tx = FMA(KP559016994, T8, T7);
Chris@82 128 T14 = TZ - TY;
Chris@82 129 T10 = TY + TZ;
Chris@82 130 T11 = FMA(KP500000000, T10, TX);
Chris@82 131 TA = FNMS(KP809016994, Tz, Ta);
Chris@82 132 TD = FNMS(KP809016994, TC, Tl);
Chris@82 133 TE = TA - TD;
Chris@82 134 T12 = TD + TA;
Chris@82 135 Cr[WS(csr, 1)] = Tx + TE;
Chris@82 136 Ci[WS(csi, 1)] = KP951056516 * (T10 - TX);
Chris@82 137 Ci[WS(csi, 3)] = KP951056516 * (FNMS(KP910592997, T12, T11));
Chris@82 138 Ci[WS(csi, 6)] = -(KP951056516 * (FMA(KP910592997, T12, T11)));
Chris@82 139 T13 = FNMS(KP500000000, TE, Tx);
Chris@82 140 Cr[WS(csr, 3)] = FNMS(KP823639103, T14, T13);
Chris@82 141 Cr[WS(csr, 6)] = FMA(KP823639103, T14, T13);
Chris@82 142 }
Chris@82 143 {
Chris@82 144 E T9, TQ, TU, TV, Tw, TW, Tk, Tv, TJ;
Chris@82 145 T9 = FNMS(KP559016994, T8, T7);
Chris@82 146 TQ = TM - TP;
Chris@82 147 TU = TP + TM;
Chris@82 148 TV = FMA(KP500000000, TU, TT);
Chris@82 149 Tk = FNMS(KP559016994, Tj, Ta);
Chris@82 150 Tv = FNMS(KP559016994, Tu, Tl);
Chris@82 151 Tw = Tk - Tv;
Chris@82 152 TW = Tv + Tk;
Chris@82 153 Cr[WS(csr, 4)] = T9 + Tw;
Chris@82 154 Ci[WS(csi, 4)] = KP951056516 * (TT - TU);
Chris@82 155 Ci[0] = -(KP951056516 * (FMA(KP910592997, TW, TV)));
Chris@82 156 Ci[WS(csi, 5)] = -(KP951056516 * (FNMS(KP910592997, TW, TV)));
Chris@82 157 TJ = FNMS(KP500000000, Tw, T9);
Chris@82 158 Cr[WS(csr, 5)] = FNMS(KP823639103, TQ, TJ);
Chris@82 159 Cr[0] = FMA(KP823639103, TQ, TJ);
Chris@82 160 }
Chris@82 161 }
Chris@82 162 }
Chris@82 163 }
Chris@82 164
Chris@82 165 static const kr2c_desc desc = { 15, "r2cfII_15", {38, 7, 34, 0}, &GENUS };
Chris@82 166
Chris@82 167 void X(codelet_r2cfII_15) (planner *p) {
Chris@82 168 X(kr2c_register) (p, r2cfII_15, &desc);
Chris@82 169 }
Chris@82 170
Chris@82 171 #else
Chris@82 172
Chris@82 173 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 15 -name r2cfII_15 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 174
Chris@82 175 /*
Chris@82 176 * This function contains 72 FP additions, 33 FP multiplications,
Chris@82 177 * (or, 54 additions, 15 multiplications, 18 fused multiply/add),
Chris@82 178 * 37 stack variables, 8 constants, and 30 memory accesses
Chris@82 179 */
Chris@82 180 #include "rdft/scalar/r2cfII.h"
Chris@82 181
Chris@82 182 static void r2cfII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 183 {
Chris@82 184 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 185 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 186 DK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@82 187 DK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@82 188 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 189 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 190 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 191 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 192 {
Chris@82 193 INT i;
Chris@82 194 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 195 E T1, T2, Tx, TR, TE, T7, TD, Th, Tm, Tr, TQ, TA, TB, Tf, Te;
Chris@82 196 E Tu, TS, Td, TH, TO;
Chris@82 197 T1 = R0[WS(rs, 5)];
Chris@82 198 {
Chris@82 199 E T3, Tv, T6, Tw, T4, T5;
Chris@82 200 T2 = R0[WS(rs, 2)];
Chris@82 201 T3 = R1[0];
Chris@82 202 Tv = T2 + T3;
Chris@82 203 T4 = R1[WS(rs, 3)];
Chris@82 204 T5 = R1[WS(rs, 6)];
Chris@82 205 T6 = T4 + T5;
Chris@82 206 Tw = T4 - T5;
Chris@82 207 Tx = FMA(KP951056516, Tv, KP587785252 * Tw);
Chris@82 208 TR = FNMS(KP587785252, Tv, KP951056516 * Tw);
Chris@82 209 TE = KP559016994 * (T3 - T6);
Chris@82 210 T7 = T3 + T6;
Chris@82 211 TD = KP250000000 * T7;
Chris@82 212 }
Chris@82 213 {
Chris@82 214 E Ti, Tl, Tj, Tk, Tp, Tq;
Chris@82 215 Th = R0[0];
Chris@82 216 Ti = R1[WS(rs, 4)];
Chris@82 217 Tl = R0[WS(rs, 6)];
Chris@82 218 Tj = R1[WS(rs, 1)];
Chris@82 219 Tk = R0[WS(rs, 3)];
Chris@82 220 Tp = Tk + Ti;
Chris@82 221 Tq = Tl + Tj;
Chris@82 222 Tm = Ti + Tj - (Tk + Tl);
Chris@82 223 Tr = FMA(KP951056516, Tp, KP587785252 * Tq);
Chris@82 224 TQ = FNMS(KP951056516, Tq, KP587785252 * Tp);
Chris@82 225 TA = FMA(KP250000000, Tm, Th);
Chris@82 226 TB = KP559016994 * (Tl + Ti - (Tk + Tj));
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E T9, Tt, Tc, Ts, Ta, Tb, TG;
Chris@82 230 Tf = R1[WS(rs, 2)];
Chris@82 231 T9 = R0[WS(rs, 7)];
Chris@82 232 Te = R1[WS(rs, 5)];
Chris@82 233 Tt = T9 + Te;
Chris@82 234 Ta = R0[WS(rs, 1)];
Chris@82 235 Tb = R0[WS(rs, 4)];
Chris@82 236 Tc = Ta + Tb;
Chris@82 237 Ts = Ta - Tb;
Chris@82 238 Tu = FNMS(KP951056516, Tt, KP587785252 * Ts);
Chris@82 239 TS = FMA(KP951056516, Ts, KP587785252 * Tt);
Chris@82 240 Td = T9 + Tc;
Chris@82 241 TG = KP559016994 * (T9 - Tc);
Chris@82 242 TH = FNMS(KP309016994, Te, TG) + FNMA(KP250000000, Td, Tf);
Chris@82 243 TO = FMS(KP809016994, Te, Tf) + FNMA(KP250000000, Td, TG);
Chris@82 244 }
Chris@82 245 {
Chris@82 246 E Tn, T8, Tg, To;
Chris@82 247 Tn = Th - Tm;
Chris@82 248 T8 = T1 + T2 - T7;
Chris@82 249 Tg = Td - Te - Tf;
Chris@82 250 To = T8 + Tg;
Chris@82 251 Ci[WS(csi, 2)] = KP866025403 * (T8 - Tg);
Chris@82 252 Cr[WS(csr, 2)] = FNMS(KP500000000, To, Tn);
Chris@82 253 Cr[WS(csr, 7)] = Tn + To;
Chris@82 254 }
Chris@82 255 {
Chris@82 256 E TM, TX, TT, TV, TP, TU, TN, TW;
Chris@82 257 TM = TB + TA;
Chris@82 258 TX = KP866025403 * (TR + TS);
Chris@82 259 TT = TR - TS;
Chris@82 260 TV = FMS(KP500000000, TT, TQ);
Chris@82 261 TN = T1 + TE + FNMS(KP809016994, T2, TD);
Chris@82 262 TP = TN + TO;
Chris@82 263 TU = KP866025403 * (TO - TN);
Chris@82 264 Cr[WS(csr, 1)] = TM + TP;
Chris@82 265 Ci[WS(csi, 1)] = TQ + TT;
Chris@82 266 Ci[WS(csi, 6)] = TU - TV;
Chris@82 267 Ci[WS(csi, 3)] = TU + TV;
Chris@82 268 TW = FNMS(KP500000000, TP, TM);
Chris@82 269 Cr[WS(csr, 3)] = TW - TX;
Chris@82 270 Cr[WS(csr, 6)] = TW + TX;
Chris@82 271 }
Chris@82 272 {
Chris@82 273 E Tz, TC, Ty, TK, TI, TL, TF, TJ;
Chris@82 274 Tz = KP866025403 * (Tx + Tu);
Chris@82 275 TC = TA - TB;
Chris@82 276 Ty = Tu - Tx;
Chris@82 277 TK = FMS(KP500000000, Ty, Tr);
Chris@82 278 TF = FMA(KP309016994, T2, T1) + TD - TE;
Chris@82 279 TI = TF + TH;
Chris@82 280 TL = KP866025403 * (TH - TF);
Chris@82 281 Ci[WS(csi, 4)] = Tr + Ty;
Chris@82 282 Cr[WS(csr, 4)] = TC + TI;
Chris@82 283 Ci[WS(csi, 5)] = TK - TL;
Chris@82 284 Ci[0] = TK + TL;
Chris@82 285 TJ = FNMS(KP500000000, TI, TC);
Chris@82 286 Cr[0] = Tz + TJ;
Chris@82 287 Cr[WS(csr, 5)] = TJ - Tz;
Chris@82 288 }
Chris@82 289 }
Chris@82 290 }
Chris@82 291 }
Chris@82 292
Chris@82 293 static const kr2c_desc desc = { 15, "r2cfII_15", {54, 15, 18, 0}, &GENUS };
Chris@82 294
Chris@82 295 void X(codelet_r2cfII_15) (planner *p) {
Chris@82 296 X(kr2c_register) (p, r2cfII_15, &desc);
Chris@82 297 }
Chris@82 298
Chris@82 299 #endif