annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:03 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 34 FP additions, 14 FP multiplications,
Chris@42 32 * (or, 24 additions, 4 multiplications, 10 fused multiply/add),
Chris@42 33 * 29 stack variables, 4 constants, and 20 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 40 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 42 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 43 {
Chris@42 44 INT i;
Chris@42 45 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@42 46 E Tt, T3, T7, Tq, T6, Tv, Tp, Tm, Th, T8, T1, T2, T9, Tr;
Chris@42 47 T1 = R0[0];
Chris@42 48 T2 = R1[WS(rs, 2)];
Chris@42 49 {
Chris@42 50 E Te, Tn, Td, Tf, Tb, Tc;
Chris@42 51 Tb = R0[WS(rs, 2)];
Chris@42 52 Tc = R1[WS(rs, 4)];
Chris@42 53 Te = R0[WS(rs, 3)];
Chris@42 54 Tt = T1 + T2;
Chris@42 55 T3 = T1 - T2;
Chris@42 56 Tn = Tb + Tc;
Chris@42 57 Td = Tb - Tc;
Chris@42 58 Tf = R1[0];
Chris@42 59 {
Chris@42 60 E T4, T5, To, Tg;
Chris@42 61 T4 = R0[WS(rs, 1)];
Chris@42 62 T5 = R1[WS(rs, 3)];
Chris@42 63 T7 = R0[WS(rs, 4)];
Chris@42 64 To = Te + Tf;
Chris@42 65 Tg = Te - Tf;
Chris@42 66 Tq = T4 + T5;
Chris@42 67 T6 = T4 - T5;
Chris@42 68 Tv = Tn + To;
Chris@42 69 Tp = Tn - To;
Chris@42 70 Tm = Tg - Td;
Chris@42 71 Th = Td + Tg;
Chris@42 72 T8 = R1[WS(rs, 1)];
Chris@42 73 }
Chris@42 74 }
Chris@42 75 T9 = T7 - T8;
Chris@42 76 Tr = T7 + T8;
Chris@42 77 {
Chris@42 78 E Ty, Tk, Tx, Tj, Tu, Ts;
Chris@42 79 Tu = Tq + Tr;
Chris@42 80 Ts = Tq - Tr;
Chris@42 81 {
Chris@42 82 E Ta, Tl, Tw, Ti;
Chris@42 83 Ta = T6 + T9;
Chris@42 84 Tl = T6 - T9;
Chris@42 85 Ci[WS(csi, 4)] = KP951056516 * (FMA(KP618033988, Tp, Ts));
Chris@42 86 Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP618033988, Ts, Tp));
Chris@42 87 Ty = Tu - Tv;
Chris@42 88 Tw = Tu + Tv;
Chris@42 89 Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, Tl, Tm));
Chris@42 90 Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP618033988, Tm, Tl)));
Chris@42 91 Tk = Ta - Th;
Chris@42 92 Ti = Ta + Th;
Chris@42 93 Cr[0] = Tt + Tw;
Chris@42 94 Tx = FNMS(KP250000000, Tw, Tt);
Chris@42 95 Cr[WS(csr, 5)] = T3 + Ti;
Chris@42 96 Tj = FNMS(KP250000000, Ti, T3);
Chris@42 97 }
Chris@42 98 Cr[WS(csr, 4)] = FMA(KP559016994, Ty, Tx);
Chris@42 99 Cr[WS(csr, 2)] = FNMS(KP559016994, Ty, Tx);
Chris@42 100 Cr[WS(csr, 3)] = FNMS(KP559016994, Tk, Tj);
Chris@42 101 Cr[WS(csr, 1)] = FMA(KP559016994, Tk, Tj);
Chris@42 102 }
Chris@42 103 }
Chris@42 104 }
Chris@42 105 }
Chris@42 106
Chris@42 107 static const kr2c_desc desc = { 10, "r2cf_10", {24, 4, 10, 0}, &GENUS };
Chris@42 108
Chris@42 109 void X(codelet_r2cf_10) (planner *p) {
Chris@42 110 X(kr2c_register) (p, r2cf_10, &desc);
Chris@42 111 }
Chris@42 112
Chris@42 113 #else /* HAVE_FMA */
Chris@42 114
Chris@42 115 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include r2cf.h */
Chris@42 116
Chris@42 117 /*
Chris@42 118 * This function contains 34 FP additions, 12 FP multiplications,
Chris@42 119 * (or, 28 additions, 6 multiplications, 6 fused multiply/add),
Chris@42 120 * 26 stack variables, 4 constants, and 20 memory accesses
Chris@42 121 */
Chris@42 122 #include "r2cf.h"
Chris@42 123
Chris@42 124 static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 125 {
Chris@42 126 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 127 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 128 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 129 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 130 {
Chris@42 131 INT i;
Chris@42 132 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@42 133 E Ti, Tt, Ta, Tn, Td, To, Te, Tv, T3, Tq, T6, Tr, T7, Tu, Tg;
Chris@42 134 E Th;
Chris@42 135 Tg = R0[0];
Chris@42 136 Th = R1[WS(rs, 2)];
Chris@42 137 Ti = Tg - Th;
Chris@42 138 Tt = Tg + Th;
Chris@42 139 {
Chris@42 140 E T8, T9, Tb, Tc;
Chris@42 141 T8 = R0[WS(rs, 2)];
Chris@42 142 T9 = R1[WS(rs, 4)];
Chris@42 143 Ta = T8 - T9;
Chris@42 144 Tn = T8 + T9;
Chris@42 145 Tb = R0[WS(rs, 3)];
Chris@42 146 Tc = R1[0];
Chris@42 147 Td = Tb - Tc;
Chris@42 148 To = Tb + Tc;
Chris@42 149 }
Chris@42 150 Te = Ta + Td;
Chris@42 151 Tv = Tn + To;
Chris@42 152 {
Chris@42 153 E T1, T2, T4, T5;
Chris@42 154 T1 = R0[WS(rs, 1)];
Chris@42 155 T2 = R1[WS(rs, 3)];
Chris@42 156 T3 = T1 - T2;
Chris@42 157 Tq = T1 + T2;
Chris@42 158 T4 = R0[WS(rs, 4)];
Chris@42 159 T5 = R1[WS(rs, 1)];
Chris@42 160 T6 = T4 - T5;
Chris@42 161 Tr = T4 + T5;
Chris@42 162 }
Chris@42 163 T7 = T3 + T6;
Chris@42 164 Tu = Tq + Tr;
Chris@42 165 {
Chris@42 166 E Tl, Tm, Tf, Tj, Tk;
Chris@42 167 Tl = Td - Ta;
Chris@42 168 Tm = T3 - T6;
Chris@42 169 Ci[WS(csi, 1)] = FNMS(KP951056516, Tm, KP587785252 * Tl);
Chris@42 170 Ci[WS(csi, 3)] = FMA(KP587785252, Tm, KP951056516 * Tl);
Chris@42 171 Tf = KP559016994 * (T7 - Te);
Chris@42 172 Tj = T7 + Te;
Chris@42 173 Tk = FNMS(KP250000000, Tj, Ti);
Chris@42 174 Cr[WS(csr, 1)] = Tf + Tk;
Chris@42 175 Cr[WS(csr, 5)] = Ti + Tj;
Chris@42 176 Cr[WS(csr, 3)] = Tk - Tf;
Chris@42 177 }
Chris@42 178 {
Chris@42 179 E Tp, Ts, Ty, Tw, Tx;
Chris@42 180 Tp = Tn - To;
Chris@42 181 Ts = Tq - Tr;
Chris@42 182 Ci[WS(csi, 2)] = FNMS(KP587785252, Ts, KP951056516 * Tp);
Chris@42 183 Ci[WS(csi, 4)] = FMA(KP951056516, Ts, KP587785252 * Tp);
Chris@42 184 Ty = KP559016994 * (Tu - Tv);
Chris@42 185 Tw = Tu + Tv;
Chris@42 186 Tx = FNMS(KP250000000, Tw, Tt);
Chris@42 187 Cr[WS(csr, 2)] = Tx - Ty;
Chris@42 188 Cr[0] = Tt + Tw;
Chris@42 189 Cr[WS(csr, 4)] = Ty + Tx;
Chris@42 190 }
Chris@42 191 }
Chris@42 192 }
Chris@42 193 }
Chris@42 194
Chris@42 195 static const kr2c_desc desc = { 10, "r2cf_10", {28, 6, 6, 0}, &GENUS };
Chris@42 196
Chris@42 197 void X(codelet_r2cf_10) (planner *p) {
Chris@42 198 X(kr2c_register) (p, r2cf_10, &desc);
Chris@42 199 }
Chris@42 200
Chris@42 201 #endif /* HAVE_FMA */