annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cb_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:28 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cb_12 -include rdft/scalar/r2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 38 FP additions, 16 FP multiplications,
Chris@82 32 * (or, 22 additions, 0 multiplications, 16 fused multiply/add),
Chris@82 33 * 25 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cb.h"
Chris@82 36
Chris@82 37 static void r2cb_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 40 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT i;
Chris@82 43 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@82 44 E T8, Tb, Tk, Tz, Tu, Tv, Tn, Ty, T3, Tp, Tf, T6, Tq, Ti;
Chris@82 45 {
Chris@82 46 E T9, Ta, Tl, Tm;
Chris@82 47 T8 = Cr[WS(csr, 3)];
Chris@82 48 T9 = Cr[WS(csr, 5)];
Chris@82 49 Ta = Cr[WS(csr, 1)];
Chris@82 50 Tb = T9 + Ta;
Chris@82 51 Tk = FNMS(KP2_000000000, T8, Tb);
Chris@82 52 Tz = T9 - Ta;
Chris@82 53 Tu = Ci[WS(csi, 3)];
Chris@82 54 Tl = Ci[WS(csi, 5)];
Chris@82 55 Tm = Ci[WS(csi, 1)];
Chris@82 56 Tv = Tl + Tm;
Chris@82 57 Tn = Tl - Tm;
Chris@82 58 Ty = FMA(KP2_000000000, Tu, Tv);
Chris@82 59 }
Chris@82 60 {
Chris@82 61 E Te, T1, T2, Td;
Chris@82 62 Te = Ci[WS(csi, 4)];
Chris@82 63 T1 = Cr[0];
Chris@82 64 T2 = Cr[WS(csr, 4)];
Chris@82 65 Td = T1 - T2;
Chris@82 66 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 67 Tp = FNMS(KP1_732050807, Te, Td);
Chris@82 68 Tf = FMA(KP1_732050807, Te, Td);
Chris@82 69 }
Chris@82 70 {
Chris@82 71 E Th, T4, T5, Tg;
Chris@82 72 Th = Ci[WS(csi, 2)];
Chris@82 73 T4 = Cr[WS(csr, 6)];
Chris@82 74 T5 = Cr[WS(csr, 2)];
Chris@82 75 Tg = T4 - T5;
Chris@82 76 T6 = FMA(KP2_000000000, T5, T4);
Chris@82 77 Tq = FMA(KP1_732050807, Th, Tg);
Chris@82 78 Ti = FNMS(KP1_732050807, Th, Tg);
Chris@82 79 }
Chris@82 80 {
Chris@82 81 E T7, Tc, Tx, TA;
Chris@82 82 T7 = T3 + T6;
Chris@82 83 Tc = T8 + Tb;
Chris@82 84 R0[WS(rs, 3)] = FNMS(KP2_000000000, Tc, T7);
Chris@82 85 R0[0] = FMA(KP2_000000000, Tc, T7);
Chris@82 86 {
Chris@82 87 E Tj, To, TB, TC;
Chris@82 88 Tj = Tf + Ti;
Chris@82 89 To = FMA(KP1_732050807, Tn, Tk);
Chris@82 90 R0[WS(rs, 1)] = Tj + To;
Chris@82 91 R0[WS(rs, 4)] = Tj - To;
Chris@82 92 TB = Tf - Ti;
Chris@82 93 TC = FNMS(KP1_732050807, Tz, Ty);
Chris@82 94 R1[WS(rs, 2)] = TB - TC;
Chris@82 95 R1[WS(rs, 5)] = TB + TC;
Chris@82 96 }
Chris@82 97 Tx = Tp - Tq;
Chris@82 98 TA = FMA(KP1_732050807, Tz, Ty);
Chris@82 99 R1[0] = Tx - TA;
Chris@82 100 R1[WS(rs, 3)] = Tx + TA;
Chris@82 101 {
Chris@82 102 E Tt, Tw, Tr, Ts;
Chris@82 103 Tt = T3 - T6;
Chris@82 104 Tw = Tu - Tv;
Chris@82 105 R1[WS(rs, 4)] = FNMS(KP2_000000000, Tw, Tt);
Chris@82 106 R1[WS(rs, 1)] = FMA(KP2_000000000, Tw, Tt);
Chris@82 107 Tr = Tp + Tq;
Chris@82 108 Ts = FNMS(KP1_732050807, Tn, Tk);
Chris@82 109 R0[WS(rs, 5)] = Tr + Ts;
Chris@82 110 R0[WS(rs, 2)] = Tr - Ts;
Chris@82 111 }
Chris@82 112 }
Chris@82 113 }
Chris@82 114 }
Chris@82 115 }
Chris@82 116
Chris@82 117 static const kr2c_desc desc = { 12, "r2cb_12", {22, 0, 16, 0}, &GENUS };
Chris@82 118
Chris@82 119 void X(codelet_r2cb_12) (planner *p) {
Chris@82 120 X(kr2c_register) (p, r2cb_12, &desc);
Chris@82 121 }
Chris@82 122
Chris@82 123 #else
Chris@82 124
Chris@82 125 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cb_12 -include rdft/scalar/r2cb.h */
Chris@82 126
Chris@82 127 /*
Chris@82 128 * This function contains 38 FP additions, 10 FP multiplications,
Chris@82 129 * (or, 34 additions, 6 multiplications, 4 fused multiply/add),
Chris@82 130 * 25 stack variables, 2 constants, and 24 memory accesses
Chris@82 131 */
Chris@82 132 #include "rdft/scalar/r2cb.h"
Chris@82 133
Chris@82 134 static void r2cb_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 135 {
Chris@82 136 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 137 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 138 {
Chris@82 139 INT i;
Chris@82 140 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@82 141 E T8, Tb, Tm, TA, Tw, Tx, Tp, TB, T3, Tr, Tg, T6, Ts, Tk;
Chris@82 142 {
Chris@82 143 E T9, Ta, Tn, To;
Chris@82 144 T8 = Cr[WS(csr, 3)];
Chris@82 145 T9 = Cr[WS(csr, 5)];
Chris@82 146 Ta = Cr[WS(csr, 1)];
Chris@82 147 Tb = T9 + Ta;
Chris@82 148 Tm = FMS(KP2_000000000, T8, Tb);
Chris@82 149 TA = KP1_732050807 * (T9 - Ta);
Chris@82 150 Tw = Ci[WS(csi, 3)];
Chris@82 151 Tn = Ci[WS(csi, 5)];
Chris@82 152 To = Ci[WS(csi, 1)];
Chris@82 153 Tx = Tn + To;
Chris@82 154 Tp = KP1_732050807 * (Tn - To);
Chris@82 155 TB = FMA(KP2_000000000, Tw, Tx);
Chris@82 156 }
Chris@82 157 {
Chris@82 158 E Tf, T1, T2, Td, Te;
Chris@82 159 Te = Ci[WS(csi, 4)];
Chris@82 160 Tf = KP1_732050807 * Te;
Chris@82 161 T1 = Cr[0];
Chris@82 162 T2 = Cr[WS(csr, 4)];
Chris@82 163 Td = T1 - T2;
Chris@82 164 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 165 Tr = Td - Tf;
Chris@82 166 Tg = Td + Tf;
Chris@82 167 }
Chris@82 168 {
Chris@82 169 E Tj, T4, T5, Th, Ti;
Chris@82 170 Ti = Ci[WS(csi, 2)];
Chris@82 171 Tj = KP1_732050807 * Ti;
Chris@82 172 T4 = Cr[WS(csr, 6)];
Chris@82 173 T5 = Cr[WS(csr, 2)];
Chris@82 174 Th = T4 - T5;
Chris@82 175 T6 = FMA(KP2_000000000, T5, T4);
Chris@82 176 Ts = Th + Tj;
Chris@82 177 Tk = Th - Tj;
Chris@82 178 }
Chris@82 179 {
Chris@82 180 E T7, Tc, Tz, TC;
Chris@82 181 T7 = T3 + T6;
Chris@82 182 Tc = KP2_000000000 * (T8 + Tb);
Chris@82 183 R0[WS(rs, 3)] = T7 - Tc;
Chris@82 184 R0[0] = T7 + Tc;
Chris@82 185 {
Chris@82 186 E Tl, Tq, TD, TE;
Chris@82 187 Tl = Tg + Tk;
Chris@82 188 Tq = Tm - Tp;
Chris@82 189 R0[WS(rs, 1)] = Tl - Tq;
Chris@82 190 R0[WS(rs, 4)] = Tl + Tq;
Chris@82 191 TD = Tg - Tk;
Chris@82 192 TE = TB - TA;
Chris@82 193 R1[WS(rs, 2)] = TD - TE;
Chris@82 194 R1[WS(rs, 5)] = TD + TE;
Chris@82 195 }
Chris@82 196 Tz = Tr - Ts;
Chris@82 197 TC = TA + TB;
Chris@82 198 R1[0] = Tz - TC;
Chris@82 199 R1[WS(rs, 3)] = Tz + TC;
Chris@82 200 {
Chris@82 201 E Tv, Ty, Tt, Tu;
Chris@82 202 Tv = T3 - T6;
Chris@82 203 Ty = KP2_000000000 * (Tw - Tx);
Chris@82 204 R1[WS(rs, 4)] = Tv - Ty;
Chris@82 205 R1[WS(rs, 1)] = Tv + Ty;
Chris@82 206 Tt = Tr + Ts;
Chris@82 207 Tu = Tm + Tp;
Chris@82 208 R0[WS(rs, 5)] = Tt - Tu;
Chris@82 209 R0[WS(rs, 2)] = Tt + Tu;
Chris@82 210 }
Chris@82 211 }
Chris@82 212 }
Chris@82 213 }
Chris@82 214 }
Chris@82 215
Chris@82 216 static const kr2c_desc desc = { 12, "r2cb_12", {34, 6, 4, 0}, &GENUS };
Chris@82 217
Chris@82 218 void X(codelet_r2cb_12) (planner *p) {
Chris@82 219 X(kr2c_register) (p, r2cb_12, &desc);
Chris@82 220 }
Chris@82 221
Chris@82 222 #endif