annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cf_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:55 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cf_6 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@82 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@82 33 * 31 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cf_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 44 E T1, TX, T7, TW, Tl, TS, TB, TJ, Ty, TR, TC, TO;
Chris@82 45 T1 = Rp[0];
Chris@82 46 TX = Rm[0];
Chris@82 47 {
Chris@82 48 E T3, T6, T4, TV, T2, T5;
Chris@82 49 T3 = Ip[WS(rs, 1)];
Chris@82 50 T6 = Im[WS(rs, 1)];
Chris@82 51 T2 = W[4];
Chris@82 52 T4 = T2 * T3;
Chris@82 53 TV = T2 * T6;
Chris@82 54 T5 = W[5];
Chris@82 55 T7 = FMA(T5, T6, T4);
Chris@82 56 TW = FNMS(T5, T3, TV);
Chris@82 57 }
Chris@82 58 {
Chris@82 59 E Ta, Td, Tb, TF, Tg, Tj, Th, TH, T9, Tf;
Chris@82 60 Ta = Rp[WS(rs, 1)];
Chris@82 61 Td = Rm[WS(rs, 1)];
Chris@82 62 T9 = W[2];
Chris@82 63 Tb = T9 * Ta;
Chris@82 64 TF = T9 * Td;
Chris@82 65 Tg = Ip[WS(rs, 2)];
Chris@82 66 Tj = Im[WS(rs, 2)];
Chris@82 67 Tf = W[8];
Chris@82 68 Th = Tf * Tg;
Chris@82 69 TH = Tf * Tj;
Chris@82 70 {
Chris@82 71 E Te, TG, Tk, TI, Tc, Ti;
Chris@82 72 Tc = W[3];
Chris@82 73 Te = FMA(Tc, Td, Tb);
Chris@82 74 TG = FNMS(Tc, Ta, TF);
Chris@82 75 Ti = W[9];
Chris@82 76 Tk = FMA(Ti, Tj, Th);
Chris@82 77 TI = FNMS(Ti, Tg, TH);
Chris@82 78 Tl = Te - Tk;
Chris@82 79 TS = TI - TG;
Chris@82 80 TB = Te + Tk;
Chris@82 81 TJ = TG + TI;
Chris@82 82 }
Chris@82 83 }
Chris@82 84 {
Chris@82 85 E Tn, Tq, To, TK, Tt, Tw, Tu, TM, Tm, Ts;
Chris@82 86 Tn = Rp[WS(rs, 2)];
Chris@82 87 Tq = Rm[WS(rs, 2)];
Chris@82 88 Tm = W[6];
Chris@82 89 To = Tm * Tn;
Chris@82 90 TK = Tm * Tq;
Chris@82 91 Tt = Ip[0];
Chris@82 92 Tw = Im[0];
Chris@82 93 Ts = W[0];
Chris@82 94 Tu = Ts * Tt;
Chris@82 95 TM = Ts * Tw;
Chris@82 96 {
Chris@82 97 E Tr, TL, Tx, TN, Tp, Tv;
Chris@82 98 Tp = W[7];
Chris@82 99 Tr = FMA(Tp, Tq, To);
Chris@82 100 TL = FNMS(Tp, Tn, TK);
Chris@82 101 Tv = W[1];
Chris@82 102 Tx = FMA(Tv, Tw, Tu);
Chris@82 103 TN = FNMS(Tv, Tt, TM);
Chris@82 104 Ty = Tr - Tx;
Chris@82 105 TR = TN - TL;
Chris@82 106 TC = Tr + Tx;
Chris@82 107 TO = TL + TN;
Chris@82 108 }
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E TT, T8, Tz, TQ;
Chris@82 112 TT = TR - TS;
Chris@82 113 T8 = T1 - T7;
Chris@82 114 Tz = Tl + Ty;
Chris@82 115 TQ = FNMS(KP500000000, Tz, T8);
Chris@82 116 Rm[WS(rs, 2)] = T8 + Tz;
Chris@82 117 Rp[WS(rs, 1)] = FMA(KP866025403, TT, TQ);
Chris@82 118 Rm[0] = FNMS(KP866025403, TT, TQ);
Chris@82 119 }
Chris@82 120 {
Chris@82 121 E T14, T11, T12, T13;
Chris@82 122 T14 = Ty - Tl;
Chris@82 123 T11 = TS + TR;
Chris@82 124 T12 = TX - TW;
Chris@82 125 T13 = FMA(KP500000000, T11, T12);
Chris@82 126 Im[WS(rs, 2)] = T11 - T12;
Chris@82 127 Ip[WS(rs, 1)] = FMA(KP866025403, T14, T13);
Chris@82 128 Im[0] = FMS(KP866025403, T14, T13);
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E TP, TA, TD, TE;
Chris@82 132 TP = TJ - TO;
Chris@82 133 TA = T1 + T7;
Chris@82 134 TD = TB + TC;
Chris@82 135 TE = FNMS(KP500000000, TD, TA);
Chris@82 136 Rp[0] = TA + TD;
Chris@82 137 Rm[WS(rs, 1)] = FMA(KP866025403, TP, TE);
Chris@82 138 Rp[WS(rs, 2)] = FNMS(KP866025403, TP, TE);
Chris@82 139 }
Chris@82 140 {
Chris@82 141 E T10, TU, TY, TZ;
Chris@82 142 T10 = TB - TC;
Chris@82 143 TU = TJ + TO;
Chris@82 144 TY = TW + TX;
Chris@82 145 TZ = FNMS(KP500000000, TU, TY);
Chris@82 146 Ip[0] = TU + TY;
Chris@82 147 Ip[WS(rs, 2)] = FMA(KP866025403, T10, TZ);
Chris@82 148 Im[WS(rs, 1)] = FMS(KP866025403, T10, TZ);
Chris@82 149 }
Chris@82 150 }
Chris@82 151 }
Chris@82 152 }
Chris@82 153
Chris@82 154 static const tw_instr twinstr[] = {
Chris@82 155 {TW_FULL, 1, 6},
Chris@82 156 {TW_NEXT, 1, 0}
Chris@82 157 };
Chris@82 158
Chris@82 159 static const hc2c_desc desc = { 6, "hc2cf_6", twinstr, &GENUS, {24, 10, 22, 0} };
Chris@82 160
Chris@82 161 void X(codelet_hc2cf_6) (planner *p) {
Chris@82 162 X(khc2c_register) (p, hc2cf_6, &desc, HC2C_VIA_RDFT);
Chris@82 163 }
Chris@82 164 #else
Chris@82 165
Chris@82 166 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hc2cf_6 -include rdft/scalar/hc2cf.h */
Chris@82 167
Chris@82 168 /*
Chris@82 169 * This function contains 46 FP additions, 28 FP multiplications,
Chris@82 170 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@82 171 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@82 172 */
Chris@82 173 #include "rdft/scalar/hc2cf.h"
Chris@82 174
Chris@82 175 static void hc2cf_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 176 {
Chris@82 177 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 178 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 179 {
Chris@82 180 INT m;
Chris@82 181 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
Chris@82 182 E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
Chris@82 183 {
Chris@82 184 E T1, TN, T6, TM;
Chris@82 185 T1 = Rp[0];
Chris@82 186 TN = Rm[0];
Chris@82 187 {
Chris@82 188 E T3, T5, T2, T4;
Chris@82 189 T3 = Ip[WS(rs, 1)];
Chris@82 190 T5 = Im[WS(rs, 1)];
Chris@82 191 T2 = W[4];
Chris@82 192 T4 = W[5];
Chris@82 193 T6 = FMA(T2, T3, T4 * T5);
Chris@82 194 TM = FNMS(T4, T3, T2 * T5);
Chris@82 195 }
Chris@82 196 T7 = T1 - T6;
Chris@82 197 TS = TN - TM;
Chris@82 198 Tv = T1 + T6;
Chris@82 199 TO = TM + TN;
Chris@82 200 }
Chris@82 201 {
Chris@82 202 E Tn, TD, Ts, TE;
Chris@82 203 {
Chris@82 204 E Tk, Tm, Tj, Tl;
Chris@82 205 Tk = Rp[WS(rs, 2)];
Chris@82 206 Tm = Rm[WS(rs, 2)];
Chris@82 207 Tj = W[6];
Chris@82 208 Tl = W[7];
Chris@82 209 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 210 TD = FNMS(Tl, Tk, Tj * Tm);
Chris@82 211 }
Chris@82 212 {
Chris@82 213 E Tp, Tr, To, Tq;
Chris@82 214 Tp = Ip[0];
Chris@82 215 Tr = Im[0];
Chris@82 216 To = W[0];
Chris@82 217 Tq = W[1];
Chris@82 218 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 219 TE = FNMS(Tq, Tp, To * Tr);
Chris@82 220 }
Chris@82 221 Tt = Tn - Ts;
Chris@82 222 TJ = TE - TD;
Chris@82 223 Tx = Tn + Ts;
Chris@82 224 TF = TD + TE;
Chris@82 225 }
Chris@82 226 {
Chris@82 227 E Tc, TA, Th, TB;
Chris@82 228 {
Chris@82 229 E T9, Tb, T8, Ta;
Chris@82 230 T9 = Rp[WS(rs, 1)];
Chris@82 231 Tb = Rm[WS(rs, 1)];
Chris@82 232 T8 = W[2];
Chris@82 233 Ta = W[3];
Chris@82 234 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 235 TA = FNMS(Ta, T9, T8 * Tb);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E Te, Tg, Td, Tf;
Chris@82 239 Te = Ip[WS(rs, 2)];
Chris@82 240 Tg = Im[WS(rs, 2)];
Chris@82 241 Td = W[8];
Chris@82 242 Tf = W[9];
Chris@82 243 Th = FMA(Td, Te, Tf * Tg);
Chris@82 244 TB = FNMS(Tf, Te, Td * Tg);
Chris@82 245 }
Chris@82 246 Ti = Tc - Th;
Chris@82 247 TI = TA - TB;
Chris@82 248 Tw = Tc + Th;
Chris@82 249 TC = TA + TB;
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E TK, Tu, TH, TT, TR, TU;
Chris@82 253 TK = KP866025403 * (TI + TJ);
Chris@82 254 Tu = Ti + Tt;
Chris@82 255 TH = FNMS(KP500000000, Tu, T7);
Chris@82 256 Rm[WS(rs, 2)] = T7 + Tu;
Chris@82 257 Rp[WS(rs, 1)] = TH + TK;
Chris@82 258 Rm[0] = TH - TK;
Chris@82 259 TT = KP866025403 * (Tt - Ti);
Chris@82 260 TR = TJ - TI;
Chris@82 261 TU = FMA(KP500000000, TR, TS);
Chris@82 262 Im[WS(rs, 2)] = TR - TS;
Chris@82 263 Ip[WS(rs, 1)] = TT + TU;
Chris@82 264 Im[0] = TT - TU;
Chris@82 265 }
Chris@82 266 {
Chris@82 267 E TG, Ty, Tz, TP, TL, TQ;
Chris@82 268 TG = KP866025403 * (TC - TF);
Chris@82 269 Ty = Tw + Tx;
Chris@82 270 Tz = FNMS(KP500000000, Ty, Tv);
Chris@82 271 Rp[0] = Tv + Ty;
Chris@82 272 Rm[WS(rs, 1)] = Tz + TG;
Chris@82 273 Rp[WS(rs, 2)] = Tz - TG;
Chris@82 274 TP = KP866025403 * (Tw - Tx);
Chris@82 275 TL = TC + TF;
Chris@82 276 TQ = FNMS(KP500000000, TL, TO);
Chris@82 277 Ip[0] = TL + TO;
Chris@82 278 Ip[WS(rs, 2)] = TP + TQ;
Chris@82 279 Im[WS(rs, 1)] = TP - TQ;
Chris@82 280 }
Chris@82 281 }
Chris@82 282 }
Chris@82 283 }
Chris@82 284
Chris@82 285 static const tw_instr twinstr[] = {
Chris@82 286 {TW_FULL, 1, 6},
Chris@82 287 {TW_NEXT, 1, 0}
Chris@82 288 };
Chris@82 289
Chris@82 290 static const hc2c_desc desc = { 6, "hc2cf_6", twinstr, &GENUS, {32, 14, 14, 0} };
Chris@82 291
Chris@82 292 void X(codelet_hc2cf_6) (planner *p) {
Chris@82 293 X(khc2c_register) (p, hc2cf_6, &desc, HC2C_VIA_RDFT);
Chris@82 294 }
Chris@82 295 #endif