annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft_4.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:10 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cfdft_4 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 30 FP additions, 20 FP multiplications,
Chris@82 32 * (or, 24 additions, 14 multiplications, 6 fused multiply/add),
Chris@82 33 * 31 stack variables, 1 constants, and 16 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 40 {
Chris@82 41 INT m;
Chris@82 42 for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
Chris@82 43 E Td, Tl, Tu, Tk, TC, Tf, Tj, T4, Tr, T9, To, T5, Tv, Tp, TA;
Chris@82 44 E Tb, Tc;
Chris@82 45 Tb = Ip[0];
Chris@82 46 Tc = Im[0];
Chris@82 47 Td = Tb - Tc;
Chris@82 48 Tl = Tb + Tc;
Chris@82 49 {
Chris@82 50 E Tg, Th, Ti, T1, Tn;
Chris@82 51 Tg = Rm[0];
Chris@82 52 Th = Rp[0];
Chris@82 53 Ti = Tg - Th;
Chris@82 54 Tu = Th + Tg;
Chris@82 55 Tk = W[1];
Chris@82 56 TC = Tk * Ti;
Chris@82 57 Tf = W[0];
Chris@82 58 Tj = Tf * Ti;
Chris@82 59 {
Chris@82 60 E T2, T3, T7, T8;
Chris@82 61 T2 = Ip[WS(rs, 1)];
Chris@82 62 T3 = Im[WS(rs, 1)];
Chris@82 63 T4 = T2 - T3;
Chris@82 64 Tr = T2 + T3;
Chris@82 65 T7 = Rp[WS(rs, 1)];
Chris@82 66 T8 = Rm[WS(rs, 1)];
Chris@82 67 T9 = T7 + T8;
Chris@82 68 To = T7 - T8;
Chris@82 69 }
Chris@82 70 T1 = W[2];
Chris@82 71 T5 = T1 * T4;
Chris@82 72 Tv = T1 * T9;
Chris@82 73 Tn = W[4];
Chris@82 74 Tp = Tn * To;
Chris@82 75 TA = Tn * Tr;
Chris@82 76 }
Chris@82 77 {
Chris@82 78 E Tm, TD, Ta, Tw, Ts, TB, T6, Tq;
Chris@82 79 Tm = FNMS(Tk, Tl, Tj);
Chris@82 80 TD = FMA(Tf, Tl, TC);
Chris@82 81 T6 = W[3];
Chris@82 82 Ta = FNMS(T6, T9, T5);
Chris@82 83 Tw = FMA(T6, T4, Tv);
Chris@82 84 Tq = W[5];
Chris@82 85 Ts = FMA(Tq, Tr, Tp);
Chris@82 86 TB = FNMS(Tq, To, TA);
Chris@82 87 {
Chris@82 88 E Te, Tt, TF, TG;
Chris@82 89 Te = Ta + Td;
Chris@82 90 Tt = Tm - Ts;
Chris@82 91 Ip[0] = KP500000000 * (Te + Tt);
Chris@82 92 Im[WS(rs, 1)] = KP500000000 * (Tt - Te);
Chris@82 93 TF = Tu + Tw;
Chris@82 94 TG = TB + TD;
Chris@82 95 Rm[WS(rs, 1)] = KP500000000 * (TF - TG);
Chris@82 96 Rp[0] = KP500000000 * (TF + TG);
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E Tx, Ty, Tz, TE;
Chris@82 100 Tx = Tu - Tw;
Chris@82 101 Ty = Ts + Tm;
Chris@82 102 Rm[0] = KP500000000 * (Tx - Ty);
Chris@82 103 Rp[WS(rs, 1)] = KP500000000 * (Tx + Ty);
Chris@82 104 Tz = Td - Ta;
Chris@82 105 TE = TB - TD;
Chris@82 106 Ip[WS(rs, 1)] = KP500000000 * (Tz + TE);
Chris@82 107 Im[0] = KP500000000 * (TE - Tz);
Chris@82 108 }
Chris@82 109 }
Chris@82 110 }
Chris@82 111 }
Chris@82 112 }
Chris@82 113
Chris@82 114 static const tw_instr twinstr[] = {
Chris@82 115 {TW_FULL, 1, 4},
Chris@82 116 {TW_NEXT, 1, 0}
Chris@82 117 };
Chris@82 118
Chris@82 119 static const hc2c_desc desc = { 4, "hc2cfdft_4", twinstr, &GENUS, {24, 14, 6, 0} };
Chris@82 120
Chris@82 121 void X(codelet_hc2cfdft_4) (planner *p) {
Chris@82 122 X(khc2c_register) (p, hc2cfdft_4, &desc, HC2C_VIA_DFT);
Chris@82 123 }
Chris@82 124 #else
Chris@82 125
Chris@82 126 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 4 -dit -name hc2cfdft_4 -include rdft/scalar/hc2cf.h */
Chris@82 127
Chris@82 128 /*
Chris@82 129 * This function contains 30 FP additions, 20 FP multiplications,
Chris@82 130 * (or, 24 additions, 14 multiplications, 6 fused multiply/add),
Chris@82 131 * 18 stack variables, 1 constants, and 16 memory accesses
Chris@82 132 */
Chris@82 133 #include "rdft/scalar/hc2cf.h"
Chris@82 134
Chris@82 135 static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 136 {
Chris@82 137 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 138 {
Chris@82 139 INT m;
Chris@82 140 for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
Chris@82 141 E Tc, Tr, Tk, Tx, T9, Ts, Tp, Tw;
Chris@82 142 {
Chris@82 143 E Ta, Tb, Tj, Tf, Tg, Th, Te, Ti;
Chris@82 144 Ta = Ip[0];
Chris@82 145 Tb = Im[0];
Chris@82 146 Tj = Ta + Tb;
Chris@82 147 Tf = Rm[0];
Chris@82 148 Tg = Rp[0];
Chris@82 149 Th = Tf - Tg;
Chris@82 150 Tc = Ta - Tb;
Chris@82 151 Tr = Tg + Tf;
Chris@82 152 Te = W[0];
Chris@82 153 Ti = W[1];
Chris@82 154 Tk = FNMS(Ti, Tj, Te * Th);
Chris@82 155 Tx = FMA(Ti, Th, Te * Tj);
Chris@82 156 }
Chris@82 157 {
Chris@82 158 E T4, To, T8, Tm;
Chris@82 159 {
Chris@82 160 E T2, T3, T6, T7;
Chris@82 161 T2 = Ip[WS(rs, 1)];
Chris@82 162 T3 = Im[WS(rs, 1)];
Chris@82 163 T4 = T2 - T3;
Chris@82 164 To = T2 + T3;
Chris@82 165 T6 = Rp[WS(rs, 1)];
Chris@82 166 T7 = Rm[WS(rs, 1)];
Chris@82 167 T8 = T6 + T7;
Chris@82 168 Tm = T6 - T7;
Chris@82 169 }
Chris@82 170 {
Chris@82 171 E T1, T5, Tl, Tn;
Chris@82 172 T1 = W[2];
Chris@82 173 T5 = W[3];
Chris@82 174 T9 = FNMS(T5, T8, T1 * T4);
Chris@82 175 Ts = FMA(T1, T8, T5 * T4);
Chris@82 176 Tl = W[4];
Chris@82 177 Tn = W[5];
Chris@82 178 Tp = FMA(Tl, Tm, Tn * To);
Chris@82 179 Tw = FNMS(Tn, Tm, Tl * To);
Chris@82 180 }
Chris@82 181 }
Chris@82 182 {
Chris@82 183 E Td, Tq, Tz, TA;
Chris@82 184 Td = T9 + Tc;
Chris@82 185 Tq = Tk - Tp;
Chris@82 186 Ip[0] = KP500000000 * (Td + Tq);
Chris@82 187 Im[WS(rs, 1)] = KP500000000 * (Tq - Td);
Chris@82 188 Tz = Tr + Ts;
Chris@82 189 TA = Tw + Tx;
Chris@82 190 Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
Chris@82 191 Rp[0] = KP500000000 * (Tz + TA);
Chris@82 192 }
Chris@82 193 {
Chris@82 194 E Tt, Tu, Tv, Ty;
Chris@82 195 Tt = Tr - Ts;
Chris@82 196 Tu = Tp + Tk;
Chris@82 197 Rm[0] = KP500000000 * (Tt - Tu);
Chris@82 198 Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
Chris@82 199 Tv = Tc - T9;
Chris@82 200 Ty = Tw - Tx;
Chris@82 201 Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
Chris@82 202 Im[0] = KP500000000 * (Ty - Tv);
Chris@82 203 }
Chris@82 204 }
Chris@82 205 }
Chris@82 206 }
Chris@82 207
Chris@82 208 static const tw_instr twinstr[] = {
Chris@82 209 {TW_FULL, 1, 4},
Chris@82 210 {TW_NEXT, 1, 0}
Chris@82 211 };
Chris@82 212
Chris@82 213 static const hc2c_desc desc = { 4, "hc2cfdft_4", twinstr, &GENUS, {24, 14, 6, 0} };
Chris@82 214
Chris@82 215 void X(codelet_hc2cfdft_4) (planner *p) {
Chris@82 216 X(khc2c_register) (p, hc2cfdft_4, &desc, HC2C_VIA_DFT);
Chris@82 217 }
Chris@82 218 #endif