annotate src/fftw-3.3.5/rdft/scalar/r2cf/hf_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:15 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2hc.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include hf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 46 FP additions, 32 FP multiplications,
Chris@42 32 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
Chris@42 33 * 47 stack variables, 2 constants, and 24 memory accesses
Chris@42 34 */
Chris@42 35 #include "hf.h"
Chris@42 36
Chris@42 37 static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 41 {
Chris@42 42 INT m;
Chris@42 43 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 44 E T11, T12, T14, T13;
Chris@42 45 {
Chris@42 46 E T1, TV, TX, T7, Tn, Tq, TO, TR, TB, Tl, To, TH, Tt, Tw, Ts;
Chris@42 47 E Tp, Tv;
Chris@42 48 T1 = cr[0];
Chris@42 49 TV = ci[0];
Chris@42 50 {
Chris@42 51 E T3, T6, T2, T5;
Chris@42 52 T3 = cr[WS(rs, 3)];
Chris@42 53 T6 = ci[WS(rs, 3)];
Chris@42 54 T2 = W[4];
Chris@42 55 T5 = W[5];
Chris@42 56 {
Chris@42 57 E Ta, Td, Tg, TM, Tb, Tj, Tf, Tc, Ti, TW, T4, T9;
Chris@42 58 Ta = cr[WS(rs, 2)];
Chris@42 59 Td = ci[WS(rs, 2)];
Chris@42 60 TW = T2 * T6;
Chris@42 61 T4 = T2 * T3;
Chris@42 62 T9 = W[2];
Chris@42 63 Tg = cr[WS(rs, 5)];
Chris@42 64 TX = FNMS(T5, T3, TW);
Chris@42 65 T7 = FMA(T5, T6, T4);
Chris@42 66 TM = T9 * Td;
Chris@42 67 Tb = T9 * Ta;
Chris@42 68 Tj = ci[WS(rs, 5)];
Chris@42 69 Tf = W[8];
Chris@42 70 Tc = W[3];
Chris@42 71 Ti = W[9];
Chris@42 72 {
Chris@42 73 E TN, Te, TL, Tk, TK, Th, Tm;
Chris@42 74 Tn = cr[WS(rs, 4)];
Chris@42 75 TK = Tf * Tj;
Chris@42 76 Th = Tf * Tg;
Chris@42 77 TN = FNMS(Tc, Ta, TM);
Chris@42 78 Te = FMA(Tc, Td, Tb);
Chris@42 79 TL = FNMS(Ti, Tg, TK);
Chris@42 80 Tk = FMA(Ti, Tj, Th);
Chris@42 81 Tq = ci[WS(rs, 4)];
Chris@42 82 Tm = W[6];
Chris@42 83 TO = TL - TN;
Chris@42 84 TR = TN + TL;
Chris@42 85 TB = Te + Tk;
Chris@42 86 Tl = Te - Tk;
Chris@42 87 To = Tm * Tn;
Chris@42 88 TH = Tm * Tq;
Chris@42 89 }
Chris@42 90 Tt = cr[WS(rs, 1)];
Chris@42 91 Tw = ci[WS(rs, 1)];
Chris@42 92 Ts = W[0];
Chris@42 93 Tp = W[7];
Chris@42 94 Tv = W[1];
Chris@42 95 }
Chris@42 96 }
Chris@42 97 {
Chris@42 98 E TA, T8, TI, Tr, TG, Tx, TF, Tu;
Chris@42 99 TA = T1 + T7;
Chris@42 100 T8 = T1 - T7;
Chris@42 101 TF = Ts * Tw;
Chris@42 102 Tu = Ts * Tt;
Chris@42 103 TI = FNMS(Tp, Tn, TH);
Chris@42 104 Tr = FMA(Tp, Tq, To);
Chris@42 105 TG = FNMS(Tv, Tt, TF);
Chris@42 106 Tx = FMA(Tv, Tw, Tu);
Chris@42 107 {
Chris@42 108 E TY, TU, TP, TT, TD, T10, Tz, TZ, TQ, TE;
Chris@42 109 T11 = TX + TV;
Chris@42 110 TY = TV - TX;
Chris@42 111 {
Chris@42 112 E TJ, TS, TC, Ty;
Chris@42 113 TJ = TG - TI;
Chris@42 114 TS = TI + TG;
Chris@42 115 TC = Tr + Tx;
Chris@42 116 Ty = Tr - Tx;
Chris@42 117 TU = TO + TJ;
Chris@42 118 TP = TJ - TO;
Chris@42 119 TT = TR - TS;
Chris@42 120 T12 = TR + TS;
Chris@42 121 T14 = TB - TC;
Chris@42 122 TD = TB + TC;
Chris@42 123 T10 = Ty - Tl;
Chris@42 124 Tz = Tl + Ty;
Chris@42 125 TZ = FMA(KP500000000, TU, TY);
Chris@42 126 }
Chris@42 127 cr[0] = TA + TD;
Chris@42 128 TQ = FNMS(KP500000000, TD, TA);
Chris@42 129 ci[WS(rs, 2)] = T8 + Tz;
Chris@42 130 TE = FNMS(KP500000000, Tz, T8);
Chris@42 131 cr[WS(rs, 3)] = TU - TY;
Chris@42 132 cr[WS(rs, 2)] = FNMS(KP866025403, TT, TQ);
Chris@42 133 ci[WS(rs, 1)] = FMA(KP866025403, TT, TQ);
Chris@42 134 ci[0] = FNMS(KP866025403, TP, TE);
Chris@42 135 cr[WS(rs, 1)] = FMA(KP866025403, TP, TE);
Chris@42 136 ci[WS(rs, 4)] = FMA(KP866025403, T10, TZ);
Chris@42 137 cr[WS(rs, 5)] = FMS(KP866025403, T10, TZ);
Chris@42 138 }
Chris@42 139 }
Chris@42 140 }
Chris@42 141 ci[WS(rs, 5)] = T12 + T11;
Chris@42 142 T13 = FNMS(KP500000000, T12, T11);
Chris@42 143 ci[WS(rs, 3)] = FMA(KP866025403, T14, T13);
Chris@42 144 cr[WS(rs, 4)] = FMS(KP866025403, T14, T13);
Chris@42 145 }
Chris@42 146 }
Chris@42 147 }
Chris@42 148
Chris@42 149 static const tw_instr twinstr[] = {
Chris@42 150 {TW_FULL, 1, 6},
Chris@42 151 {TW_NEXT, 1, 0}
Chris@42 152 };
Chris@42 153
Chris@42 154 static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {24, 10, 22, 0} };
Chris@42 155
Chris@42 156 void X(codelet_hf_6) (planner *p) {
Chris@42 157 X(khc2hc_register) (p, hf_6, &desc);
Chris@42 158 }
Chris@42 159 #else /* HAVE_FMA */
Chris@42 160
Chris@42 161 /* Generated by: ../../../genfft/gen_hc2hc.native -compact -variables 4 -pipeline-latency 4 -n 6 -dit -name hf_6 -include hf.h */
Chris@42 162
Chris@42 163 /*
Chris@42 164 * This function contains 46 FP additions, 28 FP multiplications,
Chris@42 165 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
Chris@42 166 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@42 167 */
Chris@42 168 #include "hf.h"
Chris@42 169
Chris@42 170 static void hf_6(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 171 {
Chris@42 172 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 173 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 174 {
Chris@42 175 INT m;
Chris@42 176 for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
Chris@42 177 E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
Chris@42 178 {
Chris@42 179 E T1, TM, T6, TN;
Chris@42 180 T1 = cr[0];
Chris@42 181 TM = ci[0];
Chris@42 182 {
Chris@42 183 E T3, T5, T2, T4;
Chris@42 184 T3 = cr[WS(rs, 3)];
Chris@42 185 T5 = ci[WS(rs, 3)];
Chris@42 186 T2 = W[4];
Chris@42 187 T4 = W[5];
Chris@42 188 T6 = FMA(T2, T3, T4 * T5);
Chris@42 189 TN = FNMS(T4, T3, T2 * T5);
Chris@42 190 }
Chris@42 191 T7 = T1 - T6;
Chris@42 192 TS = TN + TM;
Chris@42 193 Tv = T1 + T6;
Chris@42 194 TO = TM - TN;
Chris@42 195 }
Chris@42 196 {
Chris@42 197 E Tn, TE, Ts, TD;
Chris@42 198 {
Chris@42 199 E Tk, Tm, Tj, Tl;
Chris@42 200 Tk = cr[WS(rs, 4)];
Chris@42 201 Tm = ci[WS(rs, 4)];
Chris@42 202 Tj = W[6];
Chris@42 203 Tl = W[7];
Chris@42 204 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@42 205 TE = FNMS(Tl, Tk, Tj * Tm);
Chris@42 206 }
Chris@42 207 {
Chris@42 208 E Tp, Tr, To, Tq;
Chris@42 209 Tp = cr[WS(rs, 1)];
Chris@42 210 Tr = ci[WS(rs, 1)];
Chris@42 211 To = W[0];
Chris@42 212 Tq = W[1];
Chris@42 213 Ts = FMA(To, Tp, Tq * Tr);
Chris@42 214 TD = FNMS(Tq, Tp, To * Tr);
Chris@42 215 }
Chris@42 216 Tt = Tn - Ts;
Chris@42 217 TJ = TE + TD;
Chris@42 218 Tx = Tn + Ts;
Chris@42 219 TF = TD - TE;
Chris@42 220 }
Chris@42 221 {
Chris@42 222 E Tc, TA, Th, TB;
Chris@42 223 {
Chris@42 224 E T9, Tb, T8, Ta;
Chris@42 225 T9 = cr[WS(rs, 2)];
Chris@42 226 Tb = ci[WS(rs, 2)];
Chris@42 227 T8 = W[2];
Chris@42 228 Ta = W[3];
Chris@42 229 Tc = FMA(T8, T9, Ta * Tb);
Chris@42 230 TA = FNMS(Ta, T9, T8 * Tb);
Chris@42 231 }
Chris@42 232 {
Chris@42 233 E Te, Tg, Td, Tf;
Chris@42 234 Te = cr[WS(rs, 5)];
Chris@42 235 Tg = ci[WS(rs, 5)];
Chris@42 236 Td = W[8];
Chris@42 237 Tf = W[9];
Chris@42 238 Th = FMA(Td, Te, Tf * Tg);
Chris@42 239 TB = FNMS(Tf, Te, Td * Tg);
Chris@42 240 }
Chris@42 241 Ti = Tc - Th;
Chris@42 242 TI = TA + TB;
Chris@42 243 Tw = Tc + Th;
Chris@42 244 TC = TA - TB;
Chris@42 245 }
Chris@42 246 {
Chris@42 247 E TG, Tu, Tz, TK, Ty, TH;
Chris@42 248 TG = KP866025403 * (TC + TF);
Chris@42 249 Tu = Ti + Tt;
Chris@42 250 Tz = FNMS(KP500000000, Tu, T7);
Chris@42 251 ci[WS(rs, 2)] = T7 + Tu;
Chris@42 252 cr[WS(rs, 1)] = Tz + TG;
Chris@42 253 ci[0] = Tz - TG;
Chris@42 254 TK = KP866025403 * (TI - TJ);
Chris@42 255 Ty = Tw + Tx;
Chris@42 256 TH = FNMS(KP500000000, Ty, Tv);
Chris@42 257 cr[0] = Tv + Ty;
Chris@42 258 ci[WS(rs, 1)] = TH + TK;
Chris@42 259 cr[WS(rs, 2)] = TH - TK;
Chris@42 260 }
Chris@42 261 {
Chris@42 262 E TP, TL, TQ, TR, TT, TU;
Chris@42 263 TP = KP866025403 * (Tt - Ti);
Chris@42 264 TL = TF - TC;
Chris@42 265 TQ = FMA(KP500000000, TL, TO);
Chris@42 266 cr[WS(rs, 3)] = TL - TO;
Chris@42 267 ci[WS(rs, 4)] = TP + TQ;
Chris@42 268 cr[WS(rs, 5)] = TP - TQ;
Chris@42 269 TR = KP866025403 * (Tw - Tx);
Chris@42 270 TT = TI + TJ;
Chris@42 271 TU = FNMS(KP500000000, TT, TS);
Chris@42 272 cr[WS(rs, 4)] = TR - TU;
Chris@42 273 ci[WS(rs, 5)] = TT + TS;
Chris@42 274 ci[WS(rs, 3)] = TR + TU;
Chris@42 275 }
Chris@42 276 }
Chris@42 277 }
Chris@42 278 }
Chris@42 279
Chris@42 280 static const tw_instr twinstr[] = {
Chris@42 281 {TW_FULL, 1, 6},
Chris@42 282 {TW_NEXT, 1, 0}
Chris@42 283 };
Chris@42 284
Chris@42 285 static const hc2hc_desc desc = { 6, "hf_6", twinstr, &GENUS, {32, 14, 14, 0} };
Chris@42 286
Chris@42 287 void X(codelet_hf_6) (planner *p) {
Chris@42 288 X(khc2hc_register) (p, hf_6, &desc);
Chris@42 289 }
Chris@42 290 #endif /* HAVE_FMA */