annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:03 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cf_9 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 38 FP additions, 30 FP multiplications,
Chris@42 32 * (or, 12 additions, 4 multiplications, 26 fused multiply/add),
Chris@42 33 * 57 stack variables, 18 constants, and 18 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP907603734, +0.907603734547952313649323976213898122064543220);
Chris@42 40 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@42 41 DK(KP347296355, +0.347296355333860697703433253538629592000751354);
Chris@42 42 DK(KP666666666, +0.666666666666666666666666666666666666666666667);
Chris@42 43 DK(KP879385241, +0.879385241571816768108218554649462939872416269);
Chris@42 44 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 45 DK(KP673648177, +0.673648177666930348851716626769314796000375677);
Chris@42 46 DK(KP898197570, +0.898197570222573798468955502359086394667167570);
Chris@42 47 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@42 48 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 49 DK(KP203604859, +0.203604859554852403062088995281827210665664861);
Chris@42 50 DK(KP152703644, +0.152703644666139302296566746461370407999248646);
Chris@42 51 DK(KP394930843, +0.394930843634698457567117349190734585290304520);
Chris@42 52 DK(KP968908795, +0.968908795874236621082202410917456709164223497);
Chris@42 53 DK(KP726681596, +0.726681596905677465811651808188092531873167623);
Chris@42 54 DK(KP586256827, +0.586256827714544512072145703099641959914944179);
Chris@42 55 DK(KP184792530, +0.184792530904095372701352047572203755870913560);
Chris@42 56 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 57 {
Chris@42 58 INT i;
Chris@42 59 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@42 60 E Tp, Tz, Tw, Ts, TA;
Chris@42 61 {
Chris@42 62 E T1, T6, Tb, T7, T4, To, T8, Tc, Td, T2, T3;
Chris@42 63 T1 = R0[0];
Chris@42 64 T2 = R1[WS(rs, 1)];
Chris@42 65 T3 = R0[WS(rs, 3)];
Chris@42 66 T6 = R1[0];
Chris@42 67 Tb = R0[WS(rs, 1)];
Chris@42 68 T7 = R0[WS(rs, 2)];
Chris@42 69 T4 = T2 + T3;
Chris@42 70 To = T3 - T2;
Chris@42 71 T8 = R1[WS(rs, 3)];
Chris@42 72 Tc = R1[WS(rs, 2)];
Chris@42 73 Td = R0[WS(rs, 4)];
Chris@42 74 {
Chris@42 75 E T5, T9, Tk, Te, Ti;
Chris@42 76 T5 = T1 + T4;
Chris@42 77 Tp = FNMS(KP500000000, T4, T1);
Chris@42 78 T9 = T7 + T8;
Chris@42 79 Tk = T7 - T8;
Chris@42 80 Te = Tc + Td;
Chris@42 81 Ti = Td - Tc;
Chris@42 82 {
Chris@42 83 E Tl, Ta, Tu, Tf, Th;
Chris@42 84 Tl = FMS(KP500000000, T9, T6);
Chris@42 85 Ta = T6 + T9;
Chris@42 86 Tu = FMA(KP184792530, Tk, Ti);
Chris@42 87 Tf = Tb + Te;
Chris@42 88 Th = FNMS(KP500000000, Te, Tb);
Chris@42 89 {
Chris@42 90 E Tq, Ty, Tm, Tt;
Chris@42 91 Tq = FMA(KP586256827, Tl, Ti);
Chris@42 92 Ty = FMA(KP726681596, Tk, Tl);
Chris@42 93 Tm = FNMS(KP968908795, Tl, Tk);
Chris@42 94 Tt = FMA(KP394930843, Th, To);
Chris@42 95 {
Chris@42 96 E Tj, Tx, Tg, Tv;
Chris@42 97 Tj = FNMS(KP152703644, Ti, Th);
Chris@42 98 Tx = FMA(KP203604859, Th, Ti);
Chris@42 99 Tg = Ta + Tf;
Chris@42 100 Ci[WS(csi, 3)] = KP866025403 * (Tf - Ta);
Chris@42 101 Tv = FNMS(KP939692620, Tu, Tt);
Chris@42 102 {
Chris@42 103 E TB, Tn, TC, Tr;
Chris@42 104 TB = FMA(KP898197570, Ty, Tx);
Chris@42 105 Tz = FNMS(KP898197570, Ty, Tx);
Chris@42 106 Tw = FNMS(KP673648177, Tm, Tj);
Chris@42 107 Tn = FMA(KP673648177, Tm, Tj);
Chris@42 108 Cr[0] = T5 + Tg;
Chris@42 109 Cr[WS(csr, 3)] = FNMS(KP500000000, Tg, T5);
Chris@42 110 Ci[WS(csi, 2)] = KP984807753 * (FNMS(KP879385241, Tv, Tl));
Chris@42 111 Ci[WS(csi, 1)] = -(KP984807753 * (FNMS(KP879385241, To, Tn)));
Chris@42 112 TC = FMA(KP666666666, Tn, TB);
Chris@42 113 Tr = FNMS(KP347296355, Tq, Tk);
Chris@42 114 Ci[WS(csi, 4)] = KP866025403 * (FMA(KP852868531, TC, To));
Chris@42 115 Ts = FNMS(KP907603734, Tr, Th);
Chris@42 116 }
Chris@42 117 }
Chris@42 118 }
Chris@42 119 }
Chris@42 120 }
Chris@42 121 }
Chris@42 122 Cr[WS(csr, 1)] = FMA(KP852868531, Tz, Tp);
Chris@42 123 TA = FNMS(KP500000000, Tz, Tw);
Chris@42 124 Cr[WS(csr, 2)] = FNMS(KP939692620, Ts, Tp);
Chris@42 125 Cr[WS(csr, 4)] = FMA(KP852868531, TA, Tp);
Chris@42 126 }
Chris@42 127 }
Chris@42 128 }
Chris@42 129
Chris@42 130 static const kr2c_desc desc = { 9, "r2cf_9", {12, 4, 26, 0}, &GENUS };
Chris@42 131
Chris@42 132 void X(codelet_r2cf_9) (planner *p) {
Chris@42 133 X(kr2c_register) (p, r2cf_9, &desc);
Chris@42 134 }
Chris@42 135
Chris@42 136 #else /* HAVE_FMA */
Chris@42 137
Chris@42 138 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cf_9 -include r2cf.h */
Chris@42 139
Chris@42 140 /*
Chris@42 141 * This function contains 38 FP additions, 26 FP multiplications,
Chris@42 142 * (or, 21 additions, 9 multiplications, 17 fused multiply/add),
Chris@42 143 * 36 stack variables, 14 constants, and 18 memory accesses
Chris@42 144 */
Chris@42 145 #include "r2cf.h"
Chris@42 146
Chris@42 147 static void r2cf_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 148 {
Chris@42 149 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@42 150 DK(KP296198132, +0.296198132726023843175338011893050938967728390);
Chris@42 151 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@42 152 DK(KP813797681, +0.813797681349373692844693217248393223289101568);
Chris@42 153 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@42 154 DK(KP150383733, +0.150383733180435296639271897612501926072238258);
Chris@42 155 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@42 156 DK(KP663413948, +0.663413948168938396205421319635891297216863310);
Chris@42 157 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@42 158 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@42 159 DK(KP556670399, +0.556670399226419366452912952047023132968291906);
Chris@42 160 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@42 161 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 162 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 163 {
Chris@42 164 INT i;
Chris@42 165 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@42 166 E T1, T4, Tr, Ta, Tl, Ti, Tf, Tk, Tj, T2, T3, T5, Tg;
Chris@42 167 T1 = R0[0];
Chris@42 168 T2 = R1[WS(rs, 1)];
Chris@42 169 T3 = R0[WS(rs, 3)];
Chris@42 170 T4 = T2 + T3;
Chris@42 171 Tr = T3 - T2;
Chris@42 172 {
Chris@42 173 E T6, T7, T8, T9;
Chris@42 174 T6 = R1[0];
Chris@42 175 T7 = R0[WS(rs, 2)];
Chris@42 176 T8 = R1[WS(rs, 3)];
Chris@42 177 T9 = T7 + T8;
Chris@42 178 Ta = T6 + T9;
Chris@42 179 Tl = T8 - T7;
Chris@42 180 Ti = FNMS(KP500000000, T9, T6);
Chris@42 181 }
Chris@42 182 {
Chris@42 183 E Tb, Tc, Td, Te;
Chris@42 184 Tb = R0[WS(rs, 1)];
Chris@42 185 Tc = R1[WS(rs, 2)];
Chris@42 186 Td = R0[WS(rs, 4)];
Chris@42 187 Te = Tc + Td;
Chris@42 188 Tf = Tb + Te;
Chris@42 189 Tk = FNMS(KP500000000, Te, Tb);
Chris@42 190 Tj = Td - Tc;
Chris@42 191 }
Chris@42 192 Ci[WS(csi, 3)] = KP866025403 * (Tf - Ta);
Chris@42 193 T5 = T1 + T4;
Chris@42 194 Tg = Ta + Tf;
Chris@42 195 Cr[WS(csr, 3)] = FNMS(KP500000000, Tg, T5);
Chris@42 196 Cr[0] = T5 + Tg;
Chris@42 197 {
Chris@42 198 E Tt, Th, Tm, Tn, To, Tp, Tq, Ts;
Chris@42 199 Tt = KP866025403 * Tr;
Chris@42 200 Th = FNMS(KP500000000, T4, T1);
Chris@42 201 Tm = FMA(KP766044443, Ti, KP556670399 * Tl);
Chris@42 202 Tn = FMA(KP173648177, Tk, KP852868531 * Tj);
Chris@42 203 To = Tm + Tn;
Chris@42 204 Tp = FNMS(KP642787609, Ti, KP663413948 * Tl);
Chris@42 205 Tq = FNMS(KP984807753, Tk, KP150383733 * Tj);
Chris@42 206 Ts = Tp + Tq;
Chris@42 207 Cr[WS(csr, 1)] = Th + To;
Chris@42 208 Ci[WS(csi, 1)] = Tt + Ts;
Chris@42 209 Cr[WS(csr, 4)] = FMA(KP866025403, Tp - Tq, Th) - (KP500000000 * To);
Chris@42 210 Ci[WS(csi, 4)] = FNMS(KP500000000, Ts, KP866025403 * (Tr + (Tn - Tm)));
Chris@42 211 Ci[WS(csi, 2)] = FNMS(KP342020143, Tk, KP813797681 * Tj) + FNMA(KP150383733, Tl, KP984807753 * Ti) - Tt;
Chris@42 212 Cr[WS(csr, 2)] = FMA(KP173648177, Ti, Th) + FNMA(KP296198132, Tj, KP939692620 * Tk) - (KP852868531 * Tl);
Chris@42 213 }
Chris@42 214 }
Chris@42 215 }
Chris@42 216 }
Chris@42 217
Chris@42 218 static const kr2c_desc desc = { 9, "r2cf_9", {21, 9, 17, 0}, &GENUS };
Chris@42 219
Chris@42 220 void X(codelet_r2cf_9) (planner *p) {
Chris@42 221 X(kr2c_register) (p, r2cf_9, &desc);
Chris@42 222 }
Chris@42 223
Chris@42 224 #endif /* HAVE_FMA */