annotate src/fftw-3.3.5/rdft/scalar/r2cf/hc2cfdft2_4.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:48:57 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 32 FP additions, 24 FP multiplications,
Chris@42 32 * (or, 24 additions, 16 multiplications, 8 fused multiply/add),
Chris@42 33 * 33 stack variables, 1 constants, and 16 memory accesses
Chris@42 34 */
Chris@42 35 #include "hc2cf.h"
Chris@42 36
Chris@42 37 static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 40 {
Chris@42 41 INT m;
Chris@42 42 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) {
Chris@42 43 E T1, T5, T2, T4;
Chris@42 44 T1 = W[0];
Chris@42 45 T5 = W[3];
Chris@42 46 T2 = W[2];
Chris@42 47 T4 = W[1];
Chris@42 48 {
Chris@42 49 E Tc, T6, Tp, Tj, Tw, Tt, T9, TE, To, TC, Ta, Tr, Tf, Tl, Tm;
Chris@42 50 {
Chris@42 51 E Th, Tb, T3, Ti;
Chris@42 52 Th = Ip[0];
Chris@42 53 Tb = T1 * T5;
Chris@42 54 T3 = T1 * T2;
Chris@42 55 Ti = Im[0];
Chris@42 56 Tl = Rm[0];
Chris@42 57 Tc = FNMS(T4, T2, Tb);
Chris@42 58 T6 = FMA(T4, T5, T3);
Chris@42 59 Tp = Th + Ti;
Chris@42 60 Tj = Th - Ti;
Chris@42 61 Tm = Rp[0];
Chris@42 62 }
Chris@42 63 {
Chris@42 64 E T7, T8, Td, Tn, Te;
Chris@42 65 T7 = Ip[WS(rs, 1)];
Chris@42 66 T8 = Im[WS(rs, 1)];
Chris@42 67 Td = Rp[WS(rs, 1)];
Chris@42 68 Tw = Tm + Tl;
Chris@42 69 Tn = Tl - Tm;
Chris@42 70 Tt = T7 + T8;
Chris@42 71 T9 = T7 - T8;
Chris@42 72 Te = Rm[WS(rs, 1)];
Chris@42 73 TE = T4 * Tn;
Chris@42 74 To = T1 * Tn;
Chris@42 75 TC = T2 * Tt;
Chris@42 76 Ta = T6 * T9;
Chris@42 77 Tr = Td - Te;
Chris@42 78 Tf = Td + Te;
Chris@42 79 }
Chris@42 80 {
Chris@42 81 E Tq, Tk, TB, Ty, Tu, TI, TG, TF;
Chris@42 82 Tq = FNMS(T4, Tp, To);
Chris@42 83 TF = FMA(T1, Tp, TE);
Chris@42 84 {
Chris@42 85 E Tg, Tx, TD, Ts;
Chris@42 86 Tg = FNMS(Tc, Tf, Ta);
Chris@42 87 Tx = T6 * Tf;
Chris@42 88 TD = FNMS(T5, Tr, TC);
Chris@42 89 Ts = T2 * Tr;
Chris@42 90 Tk = Tg + Tj;
Chris@42 91 TB = Tj - Tg;
Chris@42 92 Ty = FMA(Tc, T9, Tx);
Chris@42 93 Tu = FMA(T5, Tt, Ts);
Chris@42 94 TI = TD + TF;
Chris@42 95 TG = TD - TF;
Chris@42 96 }
Chris@42 97 {
Chris@42 98 E Tz, TH, Tv, TA;
Chris@42 99 Tz = Tw - Ty;
Chris@42 100 TH = Tw + Ty;
Chris@42 101 Tv = Tq - Tu;
Chris@42 102 TA = Tu + Tq;
Chris@42 103 Rp[0] = KP500000000 * (TH + TI);
Chris@42 104 Rm[WS(rs, 1)] = KP500000000 * (TH - TI);
Chris@42 105 Rm[0] = KP500000000 * (Tz - TA);
Chris@42 106 Im[WS(rs, 1)] = KP500000000 * (Tv - Tk);
Chris@42 107 Ip[0] = KP500000000 * (Tk + Tv);
Chris@42 108 Im[0] = KP500000000 * (TG - TB);
Chris@42 109 Rp[WS(rs, 1)] = KP500000000 * (Tz + TA);
Chris@42 110 Ip[WS(rs, 1)] = KP500000000 * (TB + TG);
Chris@42 111 }
Chris@42 112 }
Chris@42 113 }
Chris@42 114 }
Chris@42 115 }
Chris@42 116 }
Chris@42 117
Chris@42 118 static const tw_instr twinstr[] = {
Chris@42 119 {TW_CEXP, 1, 1},
Chris@42 120 {TW_CEXP, 1, 3},
Chris@42 121 {TW_NEXT, 1, 0}
Chris@42 122 };
Chris@42 123
Chris@42 124 static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, {24, 16, 8, 0} };
Chris@42 125
Chris@42 126 void X(codelet_hc2cfdft2_4) (planner *p) {
Chris@42 127 X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT);
Chris@42 128 }
Chris@42 129 #else /* HAVE_FMA */
Chris@42 130
Chris@42 131 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include hc2cf.h */
Chris@42 132
Chris@42 133 /*
Chris@42 134 * This function contains 32 FP additions, 24 FP multiplications,
Chris@42 135 * (or, 24 additions, 16 multiplications, 8 fused multiply/add),
Chris@42 136 * 24 stack variables, 1 constants, and 16 memory accesses
Chris@42 137 */
Chris@42 138 #include "hc2cf.h"
Chris@42 139
Chris@42 140 static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 141 {
Chris@42 142 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 143 {
Chris@42 144 INT m;
Chris@42 145 for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) {
Chris@42 146 E T1, T3, T2, T4, T5, T9;
Chris@42 147 T1 = W[0];
Chris@42 148 T3 = W[1];
Chris@42 149 T2 = W[2];
Chris@42 150 T4 = W[3];
Chris@42 151 T5 = FMA(T1, T2, T3 * T4);
Chris@42 152 T9 = FNMS(T3, T2, T1 * T4);
Chris@42 153 {
Chris@42 154 E Tg, Tr, Tm, Tx, Td, Tw, Tp, Ts;
Chris@42 155 {
Chris@42 156 E Te, Tf, Tl, Ti, Tj, Tk;
Chris@42 157 Te = Ip[0];
Chris@42 158 Tf = Im[0];
Chris@42 159 Tl = Te + Tf;
Chris@42 160 Ti = Rm[0];
Chris@42 161 Tj = Rp[0];
Chris@42 162 Tk = Ti - Tj;
Chris@42 163 Tg = Te - Tf;
Chris@42 164 Tr = Tj + Ti;
Chris@42 165 Tm = FNMS(T3, Tl, T1 * Tk);
Chris@42 166 Tx = FMA(T3, Tk, T1 * Tl);
Chris@42 167 }
Chris@42 168 {
Chris@42 169 E T8, To, Tc, Tn;
Chris@42 170 {
Chris@42 171 E T6, T7, Ta, Tb;
Chris@42 172 T6 = Ip[WS(rs, 1)];
Chris@42 173 T7 = Im[WS(rs, 1)];
Chris@42 174 T8 = T6 - T7;
Chris@42 175 To = T6 + T7;
Chris@42 176 Ta = Rp[WS(rs, 1)];
Chris@42 177 Tb = Rm[WS(rs, 1)];
Chris@42 178 Tc = Ta + Tb;
Chris@42 179 Tn = Ta - Tb;
Chris@42 180 }
Chris@42 181 Td = FNMS(T9, Tc, T5 * T8);
Chris@42 182 Tw = FNMS(T4, Tn, T2 * To);
Chris@42 183 Tp = FMA(T2, Tn, T4 * To);
Chris@42 184 Ts = FMA(T5, Tc, T9 * T8);
Chris@42 185 }
Chris@42 186 {
Chris@42 187 E Th, Tq, Tz, TA;
Chris@42 188 Th = Td + Tg;
Chris@42 189 Tq = Tm - Tp;
Chris@42 190 Ip[0] = KP500000000 * (Th + Tq);
Chris@42 191 Im[WS(rs, 1)] = KP500000000 * (Tq - Th);
Chris@42 192 Tz = Tr + Ts;
Chris@42 193 TA = Tw + Tx;
Chris@42 194 Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
Chris@42 195 Rp[0] = KP500000000 * (Tz + TA);
Chris@42 196 }
Chris@42 197 {
Chris@42 198 E Tt, Tu, Tv, Ty;
Chris@42 199 Tt = Tr - Ts;
Chris@42 200 Tu = Tp + Tm;
Chris@42 201 Rm[0] = KP500000000 * (Tt - Tu);
Chris@42 202 Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
Chris@42 203 Tv = Tg - Td;
Chris@42 204 Ty = Tw - Tx;
Chris@42 205 Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
Chris@42 206 Im[0] = KP500000000 * (Ty - Tv);
Chris@42 207 }
Chris@42 208 }
Chris@42 209 }
Chris@42 210 }
Chris@42 211 }
Chris@42 212
Chris@42 213 static const tw_instr twinstr[] = {
Chris@42 214 {TW_CEXP, 1, 1},
Chris@42 215 {TW_CEXP, 1, 3},
Chris@42 216 {TW_NEXT, 1, 0}
Chris@42 217 };
Chris@42 218
Chris@42 219 static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, {24, 16, 8, 0} };
Chris@42 220
Chris@42 221 void X(codelet_hc2cfdft2_4) (planner *p) {
Chris@42 222 X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT);
Chris@42 223 }
Chris@42 224 #endif /* HAVE_FMA */