annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:26 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 34 FP additions, 14 FP multiplications,
Chris@82 32 * (or, 24 additions, 4 multiplications, 10 fused multiply/add),
Chris@82 33 * 26 stack variables, 4 constants, and 20 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 40 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 43 {
Chris@82 44 INT i;
Chris@82 45 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@82 46 E T3, Tt, Td, Tn, Tg, To, Th, Tv, T6, Tq, T9, Tr, Ta, Tu, T1;
Chris@82 47 E T2;
Chris@82 48 T1 = R0[0];
Chris@82 49 T2 = R1[WS(rs, 2)];
Chris@82 50 T3 = T1 - T2;
Chris@82 51 Tt = T1 + T2;
Chris@82 52 {
Chris@82 53 E Tb, Tc, Te, Tf;
Chris@82 54 Tb = R0[WS(rs, 2)];
Chris@82 55 Tc = R1[WS(rs, 4)];
Chris@82 56 Td = Tb - Tc;
Chris@82 57 Tn = Tb + Tc;
Chris@82 58 Te = R0[WS(rs, 3)];
Chris@82 59 Tf = R1[0];
Chris@82 60 Tg = Te - Tf;
Chris@82 61 To = Te + Tf;
Chris@82 62 }
Chris@82 63 Th = Td + Tg;
Chris@82 64 Tv = Tn + To;
Chris@82 65 {
Chris@82 66 E T4, T5, T7, T8;
Chris@82 67 T4 = R0[WS(rs, 1)];
Chris@82 68 T5 = R1[WS(rs, 3)];
Chris@82 69 T6 = T4 - T5;
Chris@82 70 Tq = T4 + T5;
Chris@82 71 T7 = R0[WS(rs, 4)];
Chris@82 72 T8 = R1[WS(rs, 1)];
Chris@82 73 T9 = T7 - T8;
Chris@82 74 Tr = T7 + T8;
Chris@82 75 }
Chris@82 76 Ta = T6 + T9;
Chris@82 77 Tu = Tq + Tr;
Chris@82 78 {
Chris@82 79 E Tl, Tm, Tk, Ti, Tj;
Chris@82 80 Tl = T6 - T9;
Chris@82 81 Tm = Tg - Td;
Chris@82 82 Ci[WS(csi, 1)] = -(KP951056516 * (FNMS(KP618033988, Tm, Tl)));
Chris@82 83 Ci[WS(csi, 3)] = KP951056516 * (FMA(KP618033988, Tl, Tm));
Chris@82 84 Tk = Ta - Th;
Chris@82 85 Ti = Ta + Th;
Chris@82 86 Tj = FNMS(KP250000000, Ti, T3);
Chris@82 87 Cr[WS(csr, 1)] = FMA(KP559016994, Tk, Tj);
Chris@82 88 Cr[WS(csr, 5)] = T3 + Ti;
Chris@82 89 Cr[WS(csr, 3)] = FNMS(KP559016994, Tk, Tj);
Chris@82 90 }
Chris@82 91 {
Chris@82 92 E Tp, Ts, Ty, Tw, Tx;
Chris@82 93 Tp = Tn - To;
Chris@82 94 Ts = Tq - Tr;
Chris@82 95 Ci[WS(csi, 2)] = KP951056516 * (FNMS(KP618033988, Ts, Tp));
Chris@82 96 Ci[WS(csi, 4)] = KP951056516 * (FMA(KP618033988, Tp, Ts));
Chris@82 97 Ty = Tu - Tv;
Chris@82 98 Tw = Tu + Tv;
Chris@82 99 Tx = FNMS(KP250000000, Tw, Tt);
Chris@82 100 Cr[WS(csr, 2)] = FNMS(KP559016994, Ty, Tx);
Chris@82 101 Cr[0] = Tt + Tw;
Chris@82 102 Cr[WS(csr, 4)] = FMA(KP559016994, Ty, Tx);
Chris@82 103 }
Chris@82 104 }
Chris@82 105 }
Chris@82 106 }
Chris@82 107
Chris@82 108 static const kr2c_desc desc = { 10, "r2cf_10", {24, 4, 10, 0}, &GENUS };
Chris@82 109
Chris@82 110 void X(codelet_r2cf_10) (planner *p) {
Chris@82 111 X(kr2c_register) (p, r2cf_10, &desc);
Chris@82 112 }
Chris@82 113
Chris@82 114 #else
Chris@82 115
Chris@82 116 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 10 -name r2cf_10 -include rdft/scalar/r2cf.h */
Chris@82 117
Chris@82 118 /*
Chris@82 119 * This function contains 34 FP additions, 12 FP multiplications,
Chris@82 120 * (or, 28 additions, 6 multiplications, 6 fused multiply/add),
Chris@82 121 * 26 stack variables, 4 constants, and 20 memory accesses
Chris@82 122 */
Chris@82 123 #include "rdft/scalar/r2cf.h"
Chris@82 124
Chris@82 125 static void r2cf_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 126 {
Chris@82 127 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 128 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 129 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 130 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 131 {
Chris@82 132 INT i;
Chris@82 133 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@82 134 E Ti, Tt, Ta, Tn, Td, To, Te, Tv, T3, Tq, T6, Tr, T7, Tu, Tg;
Chris@82 135 E Th;
Chris@82 136 Tg = R0[0];
Chris@82 137 Th = R1[WS(rs, 2)];
Chris@82 138 Ti = Tg - Th;
Chris@82 139 Tt = Tg + Th;
Chris@82 140 {
Chris@82 141 E T8, T9, Tb, Tc;
Chris@82 142 T8 = R0[WS(rs, 2)];
Chris@82 143 T9 = R1[WS(rs, 4)];
Chris@82 144 Ta = T8 - T9;
Chris@82 145 Tn = T8 + T9;
Chris@82 146 Tb = R0[WS(rs, 3)];
Chris@82 147 Tc = R1[0];
Chris@82 148 Td = Tb - Tc;
Chris@82 149 To = Tb + Tc;
Chris@82 150 }
Chris@82 151 Te = Ta + Td;
Chris@82 152 Tv = Tn + To;
Chris@82 153 {
Chris@82 154 E T1, T2, T4, T5;
Chris@82 155 T1 = R0[WS(rs, 1)];
Chris@82 156 T2 = R1[WS(rs, 3)];
Chris@82 157 T3 = T1 - T2;
Chris@82 158 Tq = T1 + T2;
Chris@82 159 T4 = R0[WS(rs, 4)];
Chris@82 160 T5 = R1[WS(rs, 1)];
Chris@82 161 T6 = T4 - T5;
Chris@82 162 Tr = T4 + T5;
Chris@82 163 }
Chris@82 164 T7 = T3 + T6;
Chris@82 165 Tu = Tq + Tr;
Chris@82 166 {
Chris@82 167 E Tl, Tm, Tf, Tj, Tk;
Chris@82 168 Tl = Td - Ta;
Chris@82 169 Tm = T3 - T6;
Chris@82 170 Ci[WS(csi, 1)] = FNMS(KP951056516, Tm, KP587785252 * Tl);
Chris@82 171 Ci[WS(csi, 3)] = FMA(KP587785252, Tm, KP951056516 * Tl);
Chris@82 172 Tf = KP559016994 * (T7 - Te);
Chris@82 173 Tj = T7 + Te;
Chris@82 174 Tk = FNMS(KP250000000, Tj, Ti);
Chris@82 175 Cr[WS(csr, 1)] = Tf + Tk;
Chris@82 176 Cr[WS(csr, 5)] = Ti + Tj;
Chris@82 177 Cr[WS(csr, 3)] = Tk - Tf;
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E Tp, Ts, Ty, Tw, Tx;
Chris@82 181 Tp = Tn - To;
Chris@82 182 Ts = Tq - Tr;
Chris@82 183 Ci[WS(csi, 2)] = FNMS(KP587785252, Ts, KP951056516 * Tp);
Chris@82 184 Ci[WS(csi, 4)] = FMA(KP951056516, Ts, KP587785252 * Tp);
Chris@82 185 Ty = KP559016994 * (Tu - Tv);
Chris@82 186 Tw = Tu + Tv;
Chris@82 187 Tx = FNMS(KP250000000, Tw, Tt);
Chris@82 188 Cr[WS(csr, 2)] = Tx - Ty;
Chris@82 189 Cr[0] = Tt + Tw;
Chris@82 190 Cr[WS(csr, 4)] = Ty + Tx;
Chris@82 191 }
Chris@82 192 }
Chris@82 193 }
Chris@82 194 }
Chris@82 195
Chris@82 196 static const kr2c_desc desc = { 10, "r2cf_10", {28, 6, 6, 0}, &GENUS };
Chris@82 197
Chris@82 198 void X(codelet_r2cf_10) (planner *p) {
Chris@82 199 X(kr2c_register) (p, r2cf_10, &desc);
Chris@82 200 }
Chris@82 201
Chris@82 202 #endif