annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cb_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:28 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cb_10 -include rdft/scalar/r2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 34 FP additions, 20 FP multiplications,
Chris@82 32 * (or, 14 additions, 0 multiplications, 20 fused multiply/add),
Chris@82 33 * 26 stack variables, 5 constants, and 20 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cb.h"
Chris@82 36
Chris@82 37 static void r2cb_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 40 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 41 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 44 {
Chris@82 45 INT i;
Chris@82 46 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@82 47 E T3, Tb, Tn, Tu, Tk, Tv, Ta, Ts, Te, Tg, Ti, Tj;
Chris@82 48 {
Chris@82 49 E T1, T2, Tl, Tm;
Chris@82 50 T1 = Cr[0];
Chris@82 51 T2 = Cr[WS(csr, 5)];
Chris@82 52 T3 = T1 - T2;
Chris@82 53 Tb = T1 + T2;
Chris@82 54 Tl = Ci[WS(csi, 2)];
Chris@82 55 Tm = Ci[WS(csi, 3)];
Chris@82 56 Tn = Tl - Tm;
Chris@82 57 Tu = Tl + Tm;
Chris@82 58 }
Chris@82 59 Ti = Ci[WS(csi, 4)];
Chris@82 60 Tj = Ci[WS(csi, 1)];
Chris@82 61 Tk = Ti - Tj;
Chris@82 62 Tv = Ti + Tj;
Chris@82 63 {
Chris@82 64 E T6, Tc, T9, Td;
Chris@82 65 {
Chris@82 66 E T4, T5, T7, T8;
Chris@82 67 T4 = Cr[WS(csr, 2)];
Chris@82 68 T5 = Cr[WS(csr, 3)];
Chris@82 69 T6 = T4 - T5;
Chris@82 70 Tc = T4 + T5;
Chris@82 71 T7 = Cr[WS(csr, 4)];
Chris@82 72 T8 = Cr[WS(csr, 1)];
Chris@82 73 T9 = T7 - T8;
Chris@82 74 Td = T7 + T8;
Chris@82 75 }
Chris@82 76 Ta = T6 + T9;
Chris@82 77 Ts = T6 - T9;
Chris@82 78 Te = Tc + Td;
Chris@82 79 Tg = Tc - Td;
Chris@82 80 }
Chris@82 81 R1[WS(rs, 2)] = FMA(KP2_000000000, Ta, T3);
Chris@82 82 R0[0] = FMA(KP2_000000000, Te, Tb);
Chris@82 83 {
Chris@82 84 E To, Tq, Th, Tp, Tf;
Chris@82 85 To = FNMS(KP618033988, Tn, Tk);
Chris@82 86 Tq = FMA(KP618033988, Tk, Tn);
Chris@82 87 Tf = FNMS(KP500000000, Te, Tb);
Chris@82 88 Th = FNMS(KP1_118033988, Tg, Tf);
Chris@82 89 Tp = FMA(KP1_118033988, Tg, Tf);
Chris@82 90 R0[WS(rs, 4)] = FNMS(KP1_902113032, To, Th);
Chris@82 91 R0[WS(rs, 2)] = FMA(KP1_902113032, Tq, Tp);
Chris@82 92 R0[WS(rs, 1)] = FMA(KP1_902113032, To, Th);
Chris@82 93 R0[WS(rs, 3)] = FNMS(KP1_902113032, Tq, Tp);
Chris@82 94 }
Chris@82 95 {
Chris@82 96 E Tw, Ty, Tt, Tx, Tr;
Chris@82 97 Tw = FMA(KP618033988, Tv, Tu);
Chris@82 98 Ty = FNMS(KP618033988, Tu, Tv);
Chris@82 99 Tr = FNMS(KP500000000, Ta, T3);
Chris@82 100 Tt = FMA(KP1_118033988, Ts, Tr);
Chris@82 101 Tx = FNMS(KP1_118033988, Ts, Tr);
Chris@82 102 R1[0] = FNMS(KP1_902113032, Tw, Tt);
Chris@82 103 R1[WS(rs, 3)] = FMA(KP1_902113032, Ty, Tx);
Chris@82 104 R1[WS(rs, 4)] = FMA(KP1_902113032, Tw, Tt);
Chris@82 105 R1[WS(rs, 1)] = FNMS(KP1_902113032, Ty, Tx);
Chris@82 106 }
Chris@82 107 }
Chris@82 108 }
Chris@82 109 }
Chris@82 110
Chris@82 111 static const kr2c_desc desc = { 10, "r2cb_10", {14, 0, 20, 0}, &GENUS };
Chris@82 112
Chris@82 113 void X(codelet_r2cb_10) (planner *p) {
Chris@82 114 X(kr2c_register) (p, r2cb_10, &desc);
Chris@82 115 }
Chris@82 116
Chris@82 117 #else
Chris@82 118
Chris@82 119 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -name r2cb_10 -include rdft/scalar/r2cb.h */
Chris@82 120
Chris@82 121 /*
Chris@82 122 * This function contains 34 FP additions, 14 FP multiplications,
Chris@82 123 * (or, 26 additions, 6 multiplications, 8 fused multiply/add),
Chris@82 124 * 26 stack variables, 5 constants, and 20 memory accesses
Chris@82 125 */
Chris@82 126 #include "rdft/scalar/r2cb.h"
Chris@82 127
Chris@82 128 static void r2cb_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 129 {
Chris@82 130 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 131 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 132 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@82 133 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 134 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 135 {
Chris@82 136 INT i;
Chris@82 137 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(40, rs), MAKE_VOLATILE_STRIDE(40, csr), MAKE_VOLATILE_STRIDE(40, csi)) {
Chris@82 138 E T3, Tb, Tn, Tv, Tk, Tu, Ta, Ts, Te, Tg, Ti, Tj;
Chris@82 139 {
Chris@82 140 E T1, T2, Tl, Tm;
Chris@82 141 T1 = Cr[0];
Chris@82 142 T2 = Cr[WS(csr, 5)];
Chris@82 143 T3 = T1 - T2;
Chris@82 144 Tb = T1 + T2;
Chris@82 145 Tl = Ci[WS(csi, 4)];
Chris@82 146 Tm = Ci[WS(csi, 1)];
Chris@82 147 Tn = Tl - Tm;
Chris@82 148 Tv = Tl + Tm;
Chris@82 149 }
Chris@82 150 Ti = Ci[WS(csi, 2)];
Chris@82 151 Tj = Ci[WS(csi, 3)];
Chris@82 152 Tk = Ti - Tj;
Chris@82 153 Tu = Ti + Tj;
Chris@82 154 {
Chris@82 155 E T6, Tc, T9, Td;
Chris@82 156 {
Chris@82 157 E T4, T5, T7, T8;
Chris@82 158 T4 = Cr[WS(csr, 2)];
Chris@82 159 T5 = Cr[WS(csr, 3)];
Chris@82 160 T6 = T4 - T5;
Chris@82 161 Tc = T4 + T5;
Chris@82 162 T7 = Cr[WS(csr, 4)];
Chris@82 163 T8 = Cr[WS(csr, 1)];
Chris@82 164 T9 = T7 - T8;
Chris@82 165 Td = T7 + T8;
Chris@82 166 }
Chris@82 167 Ta = T6 + T9;
Chris@82 168 Ts = KP1_118033988 * (T6 - T9);
Chris@82 169 Te = Tc + Td;
Chris@82 170 Tg = KP1_118033988 * (Tc - Td);
Chris@82 171 }
Chris@82 172 R1[WS(rs, 2)] = FMA(KP2_000000000, Ta, T3);
Chris@82 173 R0[0] = FMA(KP2_000000000, Te, Tb);
Chris@82 174 {
Chris@82 175 E To, Tq, Th, Tp, Tf;
Chris@82 176 To = FNMS(KP1_902113032, Tn, KP1_175570504 * Tk);
Chris@82 177 Tq = FMA(KP1_902113032, Tk, KP1_175570504 * Tn);
Chris@82 178 Tf = FNMS(KP500000000, Te, Tb);
Chris@82 179 Th = Tf - Tg;
Chris@82 180 Tp = Tg + Tf;
Chris@82 181 R0[WS(rs, 1)] = Th - To;
Chris@82 182 R0[WS(rs, 2)] = Tp + Tq;
Chris@82 183 R0[WS(rs, 4)] = Th + To;
Chris@82 184 R0[WS(rs, 3)] = Tp - Tq;
Chris@82 185 }
Chris@82 186 {
Chris@82 187 E Tw, Ty, Tt, Tx, Tr;
Chris@82 188 Tw = FNMS(KP1_902113032, Tv, KP1_175570504 * Tu);
Chris@82 189 Ty = FMA(KP1_902113032, Tu, KP1_175570504 * Tv);
Chris@82 190 Tr = FNMS(KP500000000, Ta, T3);
Chris@82 191 Tt = Tr - Ts;
Chris@82 192 Tx = Ts + Tr;
Chris@82 193 R1[WS(rs, 3)] = Tt - Tw;
Chris@82 194 R1[WS(rs, 4)] = Tx + Ty;
Chris@82 195 R1[WS(rs, 1)] = Tt + Tw;
Chris@82 196 R1[0] = Tx - Ty;
Chris@82 197 }
Chris@82 198 }
Chris@82 199 }
Chris@82 200 }
Chris@82 201
Chris@82 202 static const kr2c_desc desc = { 10, "r2cb_10", {26, 6, 8, 0}, &GENUS };
Chris@82 203
Chris@82 204 void X(codelet_r2cb_10) (planner *p) {
Chris@82 205 X(kr2c_register) (p, r2cb_10, &desc);
Chris@82 206 }
Chris@82 207
Chris@82 208 #endif