annotate src/fftw-3.3.8/rdft/scalar/r2cf/hf_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:28 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include rdft/scalar/hf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@82 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@82 33 * 31 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hf.h"
Chris@82 36
Chris@82 37 static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@82 44 E T1, TV, T7, TX, Tl, TR, TB, TO, Ty, TS, TC, TJ;
Chris@82 45 T1 = cr[0];
Chris@82 46 TV = ci[0];
Chris@82 47 {
Chris@82 48 E T3, T6, T4, TW, T2, T5;
Chris@82 49 T3 = cr[WS(rs, 3)];
Chris@82 50 T6 = ci[WS(rs, 3)];
Chris@82 51 T2 = W[4];
Chris@82 52 T4 = T2 * T3;
Chris@82 53 TW = T2 * T6;
Chris@82 54 T5 = W[5];
Chris@82 55 T7 = FMA(T5, T6, T4);
Chris@82 56 TX = FNMS(T5, T3, TW);
Chris@82 57 }
Chris@82 58 {
Chris@82 59 E Ta, Td, Tb, TM, Tg, Tj, Th, TK, T9, Tf;
Chris@82 60 Ta = cr[WS(rs, 2)];
Chris@82 61 Td = ci[WS(rs, 2)];
Chris@82 62 T9 = W[2];
Chris@82 63 Tb = T9 * Ta;
Chris@82 64 TM = T9 * Td;
Chris@82 65 Tg = cr[WS(rs, 5)];
Chris@82 66 Tj = ci[WS(rs, 5)];
Chris@82 67 Tf = W[8];
Chris@82 68 Th = Tf * Tg;
Chris@82 69 TK = Tf * Tj;
Chris@82 70 {
Chris@82 71 E Te, TN, Tk, TL, Tc, Ti;
Chris@82 72 Tc = W[3];
Chris@82 73 Te = FMA(Tc, Td, Tb);
Chris@82 74 TN = FNMS(Tc, Ta, TM);
Chris@82 75 Ti = W[9];
Chris@82 76 Tk = FMA(Ti, Tj, Th);
Chris@82 77 TL = FNMS(Ti, Tg, TK);
Chris@82 78 Tl = Te - Tk;
Chris@82 79 TR = TN + TL;
Chris@82 80 TB = Te + Tk;
Chris@82 81 TO = TL - TN;
Chris@82 82 }
Chris@82 83 }
Chris@82 84 {
Chris@82 85 E Tn, Tq, To, TH, Tt, Tw, Tu, TF, Tm, Ts;
Chris@82 86 Tn = cr[WS(rs, 4)];
Chris@82 87 Tq = ci[WS(rs, 4)];
Chris@82 88 Tm = W[6];
Chris@82 89 To = Tm * Tn;
Chris@82 90 TH = Tm * Tq;
Chris@82 91 Tt = cr[WS(rs, 1)];
Chris@82 92 Tw = ci[WS(rs, 1)];
Chris@82 93 Ts = W[0];
Chris@82 94 Tu = Ts * Tt;
Chris@82 95 TF = Ts * Tw;
Chris@82 96 {
Chris@82 97 E Tr, TI, Tx, TG, Tp, Tv;
Chris@82 98 Tp = W[7];
Chris@82 99 Tr = FMA(Tp, Tq, To);
Chris@82 100 TI = FNMS(Tp, Tn, TH);
Chris@82 101 Tv = W[1];
Chris@82 102 Tx = FMA(Tv, Tw, Tu);
Chris@82 103 TG = FNMS(Tv, Tt, TF);
Chris@82 104 Ty = Tr - Tx;
Chris@82 105 TS = TI + TG;
Chris@82 106 TC = Tr + Tx;
Chris@82 107 TJ = TG - TI;
Chris@82 108 }
Chris@82 109 }
Chris@82 110 {
Chris@82 111 E TP, T8, Tz, TE;
Chris@82 112 TP = TJ - TO;
Chris@82 113 T8 = T1 - T7;
Chris@82 114 Tz = Tl + Ty;
Chris@82 115 TE = FNMS(KP500000000, Tz, T8);
Chris@82 116 ci[WS(rs, 2)] = T8 + Tz;
Chris@82 117 cr[WS(rs, 1)] = FMA(KP866025403, TP, TE);
Chris@82 118 ci[0] = FNMS(KP866025403, TP, TE);
Chris@82 119 }
Chris@82 120 {
Chris@82 121 E TT, TA, TD, TQ;
Chris@82 122 TT = TR - TS;
Chris@82 123 TA = T1 + T7;
Chris@82 124 TD = TB + TC;
Chris@82 125 TQ = FNMS(KP500000000, TD, TA);
Chris@82 126 cr[0] = TA + TD;
Chris@82 127 ci[WS(rs, 1)] = FMA(KP866025403, TT, TQ);
Chris@82 128 cr[WS(rs, 2)] = FNMS(KP866025403, TT, TQ);
Chris@82 129 }
Chris@82 130 {
Chris@82 131 E T10, TU, TY, TZ;
Chris@82 132 T10 = Ty - Tl;
Chris@82 133 TU = TO + TJ;
Chris@82 134 TY = TV - TX;
Chris@82 135 TZ = FMA(KP500000000, TU, TY);
Chris@82 136 cr[WS(rs, 3)] = TU - TY;
Chris@82 137 ci[WS(rs, 4)] = FMA(KP866025403, T10, TZ);
Chris@82 138 cr[WS(rs, 5)] = FMS(KP866025403, T10, TZ);
Chris@82 139 }
Chris@82 140 {
Chris@82 141 E T14, T11, T12, T13;
Chris@82 142 T14 = TB - TC;
Chris@82 143 T11 = TX + TV;
Chris@82 144 T12 = TR + TS;
Chris@82 145 T13 = FNMS(KP500000000, T12, T11);
Chris@82 146 cr[WS(rs, 4)] = FMS(KP866025403, T14, T13);
Chris@82 147 ci[WS(rs, 5)] = T12 + T11;
Chris@82 148 ci[WS(rs, 3)] = FMA(KP866025403, T14, T13);
Chris@82 149 }
Chris@82 150 }
Chris@82 151 }
Chris@82 152 }
Chris@82 153
Chris@82 154 static const tw_instr twinstr[] = {
Chris@82 155 {TW_FULL, 1, 6},
Chris@82 156 {TW_NEXT, 1, 0}
Chris@82 157 };
Chris@82 158
Chris@82 159 static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {24, 10, 22, 0} };
Chris@82 160
Chris@82 161 void X(codelet_hf_6) (planner *p) {
Chris@82 162 X(khc2hc_register) (p, hf_6, &desc);
Chris@82 163 }
Chris@82 164 #else
Chris@82 165
Chris@82 166 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include rdft/scalar/hf.h */
Chris@82 167
Chris@82 168 /*
Chris@82 169 * This function contains 46 FP additions, 28 FP multiplications,
Chris@82 170 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@82 171 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@82 172 */
Chris@82 173 #include "rdft/scalar/hf.h"
Chris@82 174
Chris@82 175 static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 176 {
Chris@82 177 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 178 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 179 {
Chris@82 180 INT m;
Chris@82 181 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@82 182 E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
Chris@82 183 {
Chris@82 184 E T1, TM, T6, TN;
Chris@82 185 T1 = cr[0];
Chris@82 186 TM = ci[0];
Chris@82 187 {
Chris@82 188 E T3, T5, T2, T4;
Chris@82 189 T3 = cr[WS(rs, 3)];
Chris@82 190 T5 = ci[WS(rs, 3)];
Chris@82 191 T2 = W[4];
Chris@82 192 T4 = W[5];
Chris@82 193 T6 = FMA(T2, T3, T4 * T5);
Chris@82 194 TN = FNMS(T4, T3, T2 * T5);
Chris@82 195 }
Chris@82 196 T7 = T1 - T6;
Chris@82 197 TS = TN + TM;
Chris@82 198 Tv = T1 + T6;
Chris@82 199 TO = TM - TN;
Chris@82 200 }
Chris@82 201 {
Chris@82 202 E Tn, TE, Ts, TD;
Chris@82 203 {
Chris@82 204 E Tk, Tm, Tj, Tl;
Chris@82 205 Tk = cr[WS(rs, 4)];
Chris@82 206 Tm = ci[WS(rs, 4)];
Chris@82 207 Tj = W[6];
Chris@82 208 Tl = W[7];
Chris@82 209 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 210 TE = FNMS(Tl, Tk, Tj * Tm);
Chris@82 211 }
Chris@82 212 {
Chris@82 213 E Tp, Tr, To, Tq;
Chris@82 214 Tp = cr[WS(rs, 1)];
Chris@82 215 Tr = ci[WS(rs, 1)];
Chris@82 216 To = W[0];
Chris@82 217 Tq = W[1];
Chris@82 218 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 219 TD = FNMS(Tq, Tp, To * Tr);
Chris@82 220 }
Chris@82 221 Tt = Tn - Ts;
Chris@82 222 TJ = TE + TD;
Chris@82 223 Tx = Tn + Ts;
Chris@82 224 TF = TD - TE;
Chris@82 225 }
Chris@82 226 {
Chris@82 227 E Tc, TA, Th, TB;
Chris@82 228 {
Chris@82 229 E T9, Tb, T8, Ta;
Chris@82 230 T9 = cr[WS(rs, 2)];
Chris@82 231 Tb = ci[WS(rs, 2)];
Chris@82 232 T8 = W[2];
Chris@82 233 Ta = W[3];
Chris@82 234 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 235 TA = FNMS(Ta, T9, T8 * Tb);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E Te, Tg, Td, Tf;
Chris@82 239 Te = cr[WS(rs, 5)];
Chris@82 240 Tg = ci[WS(rs, 5)];
Chris@82 241 Td = W[8];
Chris@82 242 Tf = W[9];
Chris@82 243 Th = FMA(Td, Te, Tf * Tg);
Chris@82 244 TB = FNMS(Tf, Te, Td * Tg);
Chris@82 245 }
Chris@82 246 Ti = Tc - Th;
Chris@82 247 TI = TA + TB;
Chris@82 248 Tw = Tc + Th;
Chris@82 249 TC = TA - TB;
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E TG, Tu, Tz, TK, Ty, TH;
Chris@82 253 TG = KP866025403 * (TC + TF);
Chris@82 254 Tu = Ti + Tt;
Chris@82 255 Tz = FNMS(KP500000000, Tu, T7);
Chris@82 256 ci[WS(rs, 2)] = T7 + Tu;
Chris@82 257 cr[WS(rs, 1)] = Tz + TG;
Chris@82 258 ci[0] = Tz - TG;
Chris@82 259 TK = KP866025403 * (TI - TJ);
Chris@82 260 Ty = Tw + Tx;
Chris@82 261 TH = FNMS(KP500000000, Ty, Tv);
Chris@82 262 cr[0] = Tv + Ty;
Chris@82 263 ci[WS(rs, 1)] = TH + TK;
Chris@82 264 cr[WS(rs, 2)] = TH - TK;
Chris@82 265 }
Chris@82 266 {
Chris@82 267 E TP, TL, TQ, TR, TT, TU;
Chris@82 268 TP = KP866025403 * (Tt - Ti);
Chris@82 269 TL = TF - TC;
Chris@82 270 TQ = FMA(KP500000000, TL, TO);
Chris@82 271 cr[WS(rs, 3)] = TL - TO;
Chris@82 272 ci[WS(rs, 4)] = TP + TQ;
Chris@82 273 cr[WS(rs, 5)] = TP - TQ;
Chris@82 274 TR = KP866025403 * (Tw - Tx);
Chris@82 275 TT = TI + TJ;
Chris@82 276 TU = FNMS(KP500000000, TT, TS);
Chris@82 277 cr[WS(rs, 4)] = TR - TU;
Chris@82 278 ci[WS(rs, 5)] = TT + TS;
Chris@82 279 ci[WS(rs, 3)] = TR + TU;
Chris@82 280 }
Chris@82 281 }
Chris@82 282 }
Chris@82 283 }
Chris@82 284
Chris@82 285 static const tw_instr twinstr[] = {
Chris@82 286 {TW_FULL, 1, 6},
Chris@82 287 {TW_NEXT, 1, 0}
Chris@82 288 };
Chris@82 289
Chris@82 290 static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {32, 14, 14, 0} };
Chris@82 291
Chris@82 292 void X(codelet_hf_6) (planner *p) {
Chris@82 293 X(khc2hc_register) (p, hf_6, &desc);
Chris@82 294 }
Chris@82 295 #endif