annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:51 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@82 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@82 33 * 31 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 44 E Td, Tn, TO, TJ, TN, Tk, Tr, T3, TC, Ts, TQ, Ta, Tm, TF, TG;
Chris@82 45 {
Chris@82 46 E Tb, Tc, Tj, TI, Tg, TH;
Chris@82 47 Tb = Ip[0];
Chris@82 48 Tc = Im[WS(rs, 2)];
Chris@82 49 Td = Tb - Tc;
Chris@82 50 {
Chris@82 51 E Th, Ti, Te, Tf;
Chris@82 52 Th = Ip[WS(rs, 1)];
Chris@82 53 Ti = Im[WS(rs, 1)];
Chris@82 54 Tj = Th - Ti;
Chris@82 55 TI = Th + Ti;
Chris@82 56 Te = Ip[WS(rs, 2)];
Chris@82 57 Tf = Im[0];
Chris@82 58 Tg = Te - Tf;
Chris@82 59 TH = Te + Tf;
Chris@82 60 }
Chris@82 61 Tn = Tj - Tg;
Chris@82 62 TO = TH - TI;
Chris@82 63 TJ = TH + TI;
Chris@82 64 TN = Tb + Tc;
Chris@82 65 Tk = Tg + Tj;
Chris@82 66 Tr = FNMS(KP500000000, Tk, Td);
Chris@82 67 }
Chris@82 68 {
Chris@82 69 E T9, TE, T6, TD, T1, T2;
Chris@82 70 T1 = Rp[0];
Chris@82 71 T2 = Rm[WS(rs, 2)];
Chris@82 72 T3 = T1 + T2;
Chris@82 73 TC = T1 - T2;
Chris@82 74 {
Chris@82 75 E T7, T8, T4, T5;
Chris@82 76 T7 = Rm[WS(rs, 1)];
Chris@82 77 T8 = Rp[WS(rs, 1)];
Chris@82 78 T9 = T7 + T8;
Chris@82 79 TE = T7 - T8;
Chris@82 80 T4 = Rp[WS(rs, 2)];
Chris@82 81 T5 = Rm[0];
Chris@82 82 T6 = T4 + T5;
Chris@82 83 TD = T4 - T5;
Chris@82 84 }
Chris@82 85 Ts = T6 - T9;
Chris@82 86 TQ = TD - TE;
Chris@82 87 Ta = T6 + T9;
Chris@82 88 Tm = FNMS(KP500000000, Ta, T3);
Chris@82 89 TF = TD + TE;
Chris@82 90 TG = FNMS(KP500000000, TF, TC);
Chris@82 91 }
Chris@82 92 Rp[0] = T3 + Ta;
Chris@82 93 Rm[0] = Td + Tk;
Chris@82 94 {
Chris@82 95 E To, Tt, Tp, Tu, Tl, Tq;
Chris@82 96 To = FNMS(KP866025403, Tn, Tm);
Chris@82 97 Tt = FNMS(KP866025403, Ts, Tr);
Chris@82 98 Tl = W[2];
Chris@82 99 Tp = Tl * To;
Chris@82 100 Tu = Tl * Tt;
Chris@82 101 Tq = W[3];
Chris@82 102 Rp[WS(rs, 1)] = FNMS(Tq, Tt, Tp);
Chris@82 103 Rm[WS(rs, 1)] = FMA(Tq, To, Tu);
Chris@82 104 }
Chris@82 105 {
Chris@82 106 E T13, TZ, T11, T12, T14, T10;
Chris@82 107 T13 = TN + TO;
Chris@82 108 T10 = TC + TF;
Chris@82 109 TZ = W[4];
Chris@82 110 T11 = TZ * T10;
Chris@82 111 T12 = W[5];
Chris@82 112 T14 = T12 * T10;
Chris@82 113 Ip[WS(rs, 1)] = FNMS(T12, T13, T11);
Chris@82 114 Im[WS(rs, 1)] = FMA(TZ, T13, T14);
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E Tw, Tz, Tx, TA, Tv, Ty;
Chris@82 118 Tw = FMA(KP866025403, Tn, Tm);
Chris@82 119 Tz = FMA(KP866025403, Ts, Tr);
Chris@82 120 Tv = W[6];
Chris@82 121 Tx = Tv * Tw;
Chris@82 122 TA = Tv * Tz;
Chris@82 123 Ty = W[7];
Chris@82 124 Rp[WS(rs, 2)] = FNMS(Ty, Tz, Tx);
Chris@82 125 Rm[WS(rs, 2)] = FMA(Ty, Tw, TA);
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E TR, TX, TT, TV, TW, TY, TB, TL, TM, TS, TP, TU, TK;
Chris@82 129 TP = FNMS(KP500000000, TO, TN);
Chris@82 130 TR = FMA(KP866025403, TQ, TP);
Chris@82 131 TX = FNMS(KP866025403, TQ, TP);
Chris@82 132 TU = FMA(KP866025403, TJ, TG);
Chris@82 133 TT = W[8];
Chris@82 134 TV = TT * TU;
Chris@82 135 TW = W[9];
Chris@82 136 TY = TW * TU;
Chris@82 137 TK = FNMS(KP866025403, TJ, TG);
Chris@82 138 TB = W[0];
Chris@82 139 TL = TB * TK;
Chris@82 140 TM = W[1];
Chris@82 141 TS = TM * TK;
Chris@82 142 Ip[0] = FNMS(TM, TR, TL);
Chris@82 143 Im[0] = FMA(TB, TR, TS);
Chris@82 144 Ip[WS(rs, 2)] = FNMS(TW, TX, TV);
Chris@82 145 Im[WS(rs, 2)] = FMA(TT, TX, TY);
Chris@82 146 }
Chris@82 147 }
Chris@82 148 }
Chris@82 149 }
Chris@82 150
Chris@82 151 static const tw_instr twinstr[] = {
Chris@82 152 {TW_FULL, 1, 6},
Chris@82 153 {TW_NEXT, 1, 0}
Chris@82 154 };
Chris@82 155
Chris@82 156 static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, {24, 10, 22, 0} };
Chris@82 157
Chris@82 158 void X(codelet_hc2cb_6) (planner *p) {
Chris@82 159 X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT);
Chris@82 160 }
Chris@82 161 #else
Chris@82 162
Chris@82 163 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include rdft/scalar/hc2cb.h */
Chris@82 164
Chris@82 165 /*
Chris@82 166 * This function contains 46 FP additions, 28 FP multiplications,
Chris@82 167 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@82 168 * 25 stack variables, 2 constants, and 24 memory accesses
Chris@82 169 */
Chris@82 170 #include "rdft/scalar/hc2cb.h"
Chris@82 171
Chris@82 172 static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 173 {
Chris@82 174 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 175 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 176 {
Chris@82 177 INT m;
Chris@82 178 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 179 E T3, Ty, Td, TE, Ta, TO, Tr, TB, Tk, TL, Tn, TH;
Chris@82 180 {
Chris@82 181 E T1, T2, Tb, Tc;
Chris@82 182 T1 = Rp[0];
Chris@82 183 T2 = Rm[WS(rs, 2)];
Chris@82 184 T3 = T1 + T2;
Chris@82 185 Ty = T1 - T2;
Chris@82 186 Tb = Ip[0];
Chris@82 187 Tc = Im[WS(rs, 2)];
Chris@82 188 Td = Tb - Tc;
Chris@82 189 TE = Tb + Tc;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E T6, Tz, T9, TA;
Chris@82 193 {
Chris@82 194 E T4, T5, T7, T8;
Chris@82 195 T4 = Rp[WS(rs, 2)];
Chris@82 196 T5 = Rm[0];
Chris@82 197 T6 = T4 + T5;
Chris@82 198 Tz = T4 - T5;
Chris@82 199 T7 = Rm[WS(rs, 1)];
Chris@82 200 T8 = Rp[WS(rs, 1)];
Chris@82 201 T9 = T7 + T8;
Chris@82 202 TA = T7 - T8;
Chris@82 203 }
Chris@82 204 Ta = T6 + T9;
Chris@82 205 TO = KP866025403 * (Tz - TA);
Chris@82 206 Tr = KP866025403 * (T6 - T9);
Chris@82 207 TB = Tz + TA;
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E Tg, TG, Tj, TF;
Chris@82 211 {
Chris@82 212 E Te, Tf, Th, Ti;
Chris@82 213 Te = Ip[WS(rs, 2)];
Chris@82 214 Tf = Im[0];
Chris@82 215 Tg = Te - Tf;
Chris@82 216 TG = Te + Tf;
Chris@82 217 Th = Ip[WS(rs, 1)];
Chris@82 218 Ti = Im[WS(rs, 1)];
Chris@82 219 Tj = Th - Ti;
Chris@82 220 TF = Th + Ti;
Chris@82 221 }
Chris@82 222 Tk = Tg + Tj;
Chris@82 223 TL = KP866025403 * (TG + TF);
Chris@82 224 Tn = KP866025403 * (Tj - Tg);
Chris@82 225 TH = TF - TG;
Chris@82 226 }
Chris@82 227 Rp[0] = T3 + Ta;
Chris@82 228 Rm[0] = Td + Tk;
Chris@82 229 {
Chris@82 230 E TC, TI, Tx, TD;
Chris@82 231 TC = Ty + TB;
Chris@82 232 TI = TE - TH;
Chris@82 233 Tx = W[4];
Chris@82 234 TD = W[5];
Chris@82 235 Ip[WS(rs, 1)] = FNMS(TD, TI, Tx * TC);
Chris@82 236 Im[WS(rs, 1)] = FMA(TD, TC, Tx * TI);
Chris@82 237 }
Chris@82 238 {
Chris@82 239 E To, Tu, Ts, Tw, Tm, Tq;
Chris@82 240 Tm = FNMS(KP500000000, Ta, T3);
Chris@82 241 To = Tm - Tn;
Chris@82 242 Tu = Tm + Tn;
Chris@82 243 Tq = FNMS(KP500000000, Tk, Td);
Chris@82 244 Ts = Tq - Tr;
Chris@82 245 Tw = Tr + Tq;
Chris@82 246 {
Chris@82 247 E Tl, Tp, Tt, Tv;
Chris@82 248 Tl = W[2];
Chris@82 249 Tp = W[3];
Chris@82 250 Rp[WS(rs, 1)] = FNMS(Tp, Ts, Tl * To);
Chris@82 251 Rm[WS(rs, 1)] = FMA(Tl, Ts, Tp * To);
Chris@82 252 Tt = W[6];
Chris@82 253 Tv = W[7];
Chris@82 254 Rp[WS(rs, 2)] = FNMS(Tv, Tw, Tt * Tu);
Chris@82 255 Rm[WS(rs, 2)] = FMA(Tt, Tw, Tv * Tu);
Chris@82 256 }
Chris@82 257 }
Chris@82 258 {
Chris@82 259 E TM, TS, TQ, TU, TK, TP;
Chris@82 260 TK = FNMS(KP500000000, TB, Ty);
Chris@82 261 TM = TK - TL;
Chris@82 262 TS = TK + TL;
Chris@82 263 TP = FMA(KP500000000, TH, TE);
Chris@82 264 TQ = TO + TP;
Chris@82 265 TU = TP - TO;
Chris@82 266 {
Chris@82 267 E TJ, TN, TR, TT;
Chris@82 268 TJ = W[0];
Chris@82 269 TN = W[1];
Chris@82 270 Ip[0] = FNMS(TN, TQ, TJ * TM);
Chris@82 271 Im[0] = FMA(TN, TM, TJ * TQ);
Chris@82 272 TR = W[8];
Chris@82 273 TT = W[9];
Chris@82 274 Ip[WS(rs, 2)] = FNMS(TT, TU, TR * TS);
Chris@82 275 Im[WS(rs, 2)] = FMA(TT, TS, TR * TU);
Chris@82 276 }
Chris@82 277 }
Chris@82 278 }
Chris@82 279 }
Chris@82 280 }
Chris@82 281
Chris@82 282 static const tw_instr twinstr[] = {
Chris@82 283 {TW_FULL, 1, 6},
Chris@82 284 {TW_NEXT, 1, 0}
Chris@82 285 };
Chris@82 286
Chris@82 287 static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, {32, 14, 14, 0} };
Chris@82 288
Chris@82 289 void X(codelet_hc2cb_6) (planner *p) {
Chris@82 290 X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT);
Chris@82 291 }
Chris@82 292 #endif