annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:04 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cf_12 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 38 FP additions, 10 FP multiplications,
Chris@42 32 * (or, 30 additions, 2 multiplications, 8 fused multiply/add),
Chris@42 33 * 31 stack variables, 2 constants, and 24 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 41 {
Chris@42 42 INT i;
Chris@42 43 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@42 44 E Tm, T6, Ty, Tp, T5, Tk, Tt, Tb, Tc, Td, T9, Tn;
Chris@42 45 {
Chris@42 46 E T1, Tg, Th, Ti, T4, T2, T3, T7, T8, Tj;
Chris@42 47 T1 = R0[0];
Chris@42 48 T2 = R0[WS(rs, 2)];
Chris@42 49 T3 = R0[WS(rs, 4)];
Chris@42 50 Tg = R1[WS(rs, 1)];
Chris@42 51 Th = R1[WS(rs, 3)];
Chris@42 52 Ti = R1[WS(rs, 5)];
Chris@42 53 T4 = T2 + T3;
Chris@42 54 Tm = T3 - T2;
Chris@42 55 T6 = R0[WS(rs, 3)];
Chris@42 56 Ty = Ti - Th;
Chris@42 57 Tj = Th + Ti;
Chris@42 58 Tp = FNMS(KP500000000, T4, T1);
Chris@42 59 T5 = T1 + T4;
Chris@42 60 T7 = R0[WS(rs, 5)];
Chris@42 61 Tk = FNMS(KP500000000, Tj, Tg);
Chris@42 62 Tt = Tg + Tj;
Chris@42 63 T8 = R0[WS(rs, 1)];
Chris@42 64 Tb = R1[WS(rs, 4)];
Chris@42 65 Tc = R1[0];
Chris@42 66 Td = R1[WS(rs, 2)];
Chris@42 67 T9 = T7 + T8;
Chris@42 68 Tn = T8 - T7;
Chris@42 69 }
Chris@42 70 {
Chris@42 71 E Te, Tz, To, TC;
Chris@42 72 Te = Tc + Td;
Chris@42 73 Tz = Td - Tc;
Chris@42 74 To = Tm - Tn;
Chris@42 75 TC = Tm + Tn;
Chris@42 76 {
Chris@42 77 E Ta, Tq, TA, TB;
Chris@42 78 Ta = T6 + T9;
Chris@42 79 Tq = FNMS(KP500000000, T9, T6);
Chris@42 80 TA = Ty - Tz;
Chris@42 81 TB = Ty + Tz;
Chris@42 82 {
Chris@42 83 E Tf, Tu, Tx, Tr;
Chris@42 84 Tf = FNMS(KP500000000, Te, Tb);
Chris@42 85 Tu = Tb + Te;
Chris@42 86 Tx = Tp - Tq;
Chris@42 87 Tr = Tp + Tq;
Chris@42 88 {
Chris@42 89 E Tv, Tw, Tl, Ts;
Chris@42 90 Tv = T5 + Ta;
Chris@42 91 Cr[WS(csr, 3)] = T5 - Ta;
Chris@42 92 Ci[WS(csi, 4)] = KP866025403 * (TC + TB);
Chris@42 93 Ci[WS(csi, 2)] = KP866025403 * (TB - TC);
Chris@42 94 Tw = Tt + Tu;
Chris@42 95 Ci[WS(csi, 3)] = Tt - Tu;
Chris@42 96 Tl = Tf - Tk;
Chris@42 97 Ts = Tk + Tf;
Chris@42 98 Cr[WS(csr, 1)] = FMA(KP866025403, TA, Tx);
Chris@42 99 Cr[WS(csr, 5)] = FNMS(KP866025403, TA, Tx);
Chris@42 100 Cr[0] = Tv + Tw;
Chris@42 101 Cr[WS(csr, 6)] = Tv - Tw;
Chris@42 102 Cr[WS(csr, 4)] = Tr + Ts;
Chris@42 103 Cr[WS(csr, 2)] = Tr - Ts;
Chris@42 104 Ci[WS(csi, 5)] = FNMS(KP866025403, To, Tl);
Chris@42 105 Ci[WS(csi, 1)] = FMA(KP866025403, To, Tl);
Chris@42 106 }
Chris@42 107 }
Chris@42 108 }
Chris@42 109 }
Chris@42 110 }
Chris@42 111 }
Chris@42 112 }
Chris@42 113
Chris@42 114 static const kr2c_desc desc = { 12, "r2cf_12", {30, 2, 8, 0}, &GENUS };
Chris@42 115
Chris@42 116 void X(codelet_r2cf_12) (planner *p) {
Chris@42 117 X(kr2c_register) (p, r2cf_12, &desc);
Chris@42 118 }
Chris@42 119
Chris@42 120 #else /* HAVE_FMA */
Chris@42 121
Chris@42 122 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cf_12 -include r2cf.h */
Chris@42 123
Chris@42 124 /*
Chris@42 125 * This function contains 38 FP additions, 8 FP multiplications,
Chris@42 126 * (or, 34 additions, 4 multiplications, 4 fused multiply/add),
Chris@42 127 * 21 stack variables, 2 constants, and 24 memory accesses
Chris@42 128 */
Chris@42 129 #include "r2cf.h"
Chris@42 130
Chris@42 131 static void r2cf_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 132 {
Chris@42 133 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 134 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 135 {
Chris@42 136 INT i;
Chris@42 137 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@42 138 E T5, Tp, Tb, Tn, Ty, Tt, Ta, Tq, Tc, Ti, Tz, Tu, Td, To;
Chris@42 139 {
Chris@42 140 E T1, T2, T3, T4;
Chris@42 141 T1 = R0[0];
Chris@42 142 T2 = R0[WS(rs, 2)];
Chris@42 143 T3 = R0[WS(rs, 4)];
Chris@42 144 T4 = T2 + T3;
Chris@42 145 T5 = T1 + T4;
Chris@42 146 Tp = FNMS(KP500000000, T4, T1);
Chris@42 147 Tb = T3 - T2;
Chris@42 148 }
Chris@42 149 {
Chris@42 150 E Tj, Tk, Tl, Tm;
Chris@42 151 Tj = R1[WS(rs, 1)];
Chris@42 152 Tk = R1[WS(rs, 3)];
Chris@42 153 Tl = R1[WS(rs, 5)];
Chris@42 154 Tm = Tk + Tl;
Chris@42 155 Tn = FNMS(KP500000000, Tm, Tj);
Chris@42 156 Ty = Tl - Tk;
Chris@42 157 Tt = Tj + Tm;
Chris@42 158 }
Chris@42 159 {
Chris@42 160 E T6, T7, T8, T9;
Chris@42 161 T6 = R0[WS(rs, 3)];
Chris@42 162 T7 = R0[WS(rs, 5)];
Chris@42 163 T8 = R0[WS(rs, 1)];
Chris@42 164 T9 = T7 + T8;
Chris@42 165 Ta = T6 + T9;
Chris@42 166 Tq = FNMS(KP500000000, T9, T6);
Chris@42 167 Tc = T8 - T7;
Chris@42 168 }
Chris@42 169 {
Chris@42 170 E Te, Tf, Tg, Th;
Chris@42 171 Te = R1[WS(rs, 4)];
Chris@42 172 Tf = R1[0];
Chris@42 173 Tg = R1[WS(rs, 2)];
Chris@42 174 Th = Tf + Tg;
Chris@42 175 Ti = FNMS(KP500000000, Th, Te);
Chris@42 176 Tz = Tg - Tf;
Chris@42 177 Tu = Te + Th;
Chris@42 178 }
Chris@42 179 Cr[WS(csr, 3)] = T5 - Ta;
Chris@42 180 Ci[WS(csi, 3)] = Tt - Tu;
Chris@42 181 Td = KP866025403 * (Tb - Tc);
Chris@42 182 To = Ti - Tn;
Chris@42 183 Ci[WS(csi, 1)] = Td + To;
Chris@42 184 Ci[WS(csi, 5)] = To - Td;
Chris@42 185 {
Chris@42 186 E Tx, TA, Tv, Tw;
Chris@42 187 Tx = Tp - Tq;
Chris@42 188 TA = KP866025403 * (Ty - Tz);
Chris@42 189 Cr[WS(csr, 5)] = Tx - TA;
Chris@42 190 Cr[WS(csr, 1)] = Tx + TA;
Chris@42 191 Tv = T5 + Ta;
Chris@42 192 Tw = Tt + Tu;
Chris@42 193 Cr[WS(csr, 6)] = Tv - Tw;
Chris@42 194 Cr[0] = Tv + Tw;
Chris@42 195 }
Chris@42 196 {
Chris@42 197 E Tr, Ts, TB, TC;
Chris@42 198 Tr = Tp + Tq;
Chris@42 199 Ts = Tn + Ti;
Chris@42 200 Cr[WS(csr, 2)] = Tr - Ts;
Chris@42 201 Cr[WS(csr, 4)] = Tr + Ts;
Chris@42 202 TB = Ty + Tz;
Chris@42 203 TC = Tb + Tc;
Chris@42 204 Ci[WS(csi, 2)] = KP866025403 * (TB - TC);
Chris@42 205 Ci[WS(csi, 4)] = KP866025403 * (TC + TB);
Chris@42 206 }
Chris@42 207 }
Chris@42 208 }
Chris@42 209 }
Chris@42 210
Chris@42 211 static const kr2c_desc desc = { 12, "r2cf_12", {34, 4, 4, 0}, &GENUS };
Chris@42 212
Chris@42 213 void X(codelet_r2cf_12) (planner *p) {
Chris@42 214 X(kr2c_register) (p, r2cf_12, &desc);
Chris@42 215 }
Chris@42 216
Chris@42 217 #endif /* HAVE_FMA */