annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cfII_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:47:26 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cfII_12 -dft-II -include r2cfII.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 45 FP additions, 24 FP multiplications,
Chris@42 32 * (or, 21 additions, 0 multiplications, 24 fused multiply/add),
Chris@42 33 * 37 stack variables, 3 constants, and 24 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cfII.h"
Chris@42 36
Chris@42 37 static void r2cfII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 40 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 41 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 42 {
Chris@42 43 INT i;
Chris@42 44 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@42 45 E TD, TB, Tp, T9, Tq, Tr, TE, To, Ts, TC;
Chris@42 46 {
Chris@42 47 E T8, T1, Tv, Tm, TF, Tz, Tl, Ta, Tb, Tt, TA, T4, Tc;
Chris@42 48 {
Chris@42 49 E Tx, Th, Ti, Tj, Ty, T6, T7, T2, T3, Tk;
Chris@42 50 Tx = R0[WS(rs, 3)];
Chris@42 51 T6 = R0[WS(rs, 5)];
Chris@42 52 T7 = R0[WS(rs, 1)];
Chris@42 53 Th = R1[WS(rs, 4)];
Chris@42 54 Ti = R1[WS(rs, 2)];
Chris@42 55 Tj = R1[0];
Chris@42 56 Ty = T6 + T7;
Chris@42 57 T8 = T6 - T7;
Chris@42 58 T1 = R0[0];
Chris@42 59 Tv = Ti - Tj - Th;
Chris@42 60 Tk = Ti - Tj;
Chris@42 61 Tm = Ti + Tj;
Chris@42 62 TF = Tx - Ty;
Chris@42 63 Tz = FMA(KP500000000, Ty, Tx);
Chris@42 64 T2 = R0[WS(rs, 2)];
Chris@42 65 T3 = R0[WS(rs, 4)];
Chris@42 66 Tl = FMA(KP500000000, Tk, Th);
Chris@42 67 Ta = R1[WS(rs, 1)];
Chris@42 68 Tb = R1[WS(rs, 3)];
Chris@42 69 Tt = T1 + T3 - T2;
Chris@42 70 TA = T3 + T2;
Chris@42 71 T4 = T2 - T3;
Chris@42 72 Tc = R1[WS(rs, 5)];
Chris@42 73 }
Chris@42 74 {
Chris@42 75 E Tn, Tg, T5, Tu;
Chris@42 76 TD = FNMS(KP866025403, TA, Tz);
Chris@42 77 TB = FMA(KP866025403, TA, Tz);
Chris@42 78 T5 = FMA(KP500000000, T4, T1);
Chris@42 79 Tu = Ta + Tc - Tb;
Chris@42 80 {
Chris@42 81 E Td, Tf, TG, Tw, Te;
Chris@42 82 Td = Tb - Tc;
Chris@42 83 Tf = Tc + Tb;
Chris@42 84 Tp = FMA(KP866025403, T8, T5);
Chris@42 85 T9 = FNMS(KP866025403, T8, T5);
Chris@42 86 TG = Tv - Tu;
Chris@42 87 Tw = Tu + Tv;
Chris@42 88 Te = FMA(KP500000000, Td, Ta);
Chris@42 89 Tq = FMA(KP866025403, Tm, Tl);
Chris@42 90 Tn = FNMS(KP866025403, Tm, Tl);
Chris@42 91 Ci[WS(csi, 1)] = FMA(KP707106781, TG, TF);
Chris@42 92 Ci[WS(csi, 4)] = FMS(KP707106781, TG, TF);
Chris@42 93 Cr[WS(csr, 4)] = FMA(KP707106781, Tw, Tt);
Chris@42 94 Cr[WS(csr, 1)] = FNMS(KP707106781, Tw, Tt);
Chris@42 95 Tg = FNMS(KP866025403, Tf, Te);
Chris@42 96 Tr = FMA(KP866025403, Tf, Te);
Chris@42 97 }
Chris@42 98 TE = Tg + Tn;
Chris@42 99 To = Tg - Tn;
Chris@42 100 }
Chris@42 101 }
Chris@42 102 Ci[WS(csi, 2)] = FMS(KP707106781, TE, TD);
Chris@42 103 Ci[WS(csi, 3)] = FMA(KP707106781, TE, TD);
Chris@42 104 Cr[0] = FMA(KP707106781, To, T9);
Chris@42 105 Cr[WS(csr, 5)] = FNMS(KP707106781, To, T9);
Chris@42 106 Ts = Tq - Tr;
Chris@42 107 TC = Tr + Tq;
Chris@42 108 Ci[0] = -(FMA(KP707106781, TC, TB));
Chris@42 109 Ci[WS(csi, 5)] = FNMS(KP707106781, TC, TB);
Chris@42 110 Cr[WS(csr, 2)] = FMA(KP707106781, Ts, Tp);
Chris@42 111 Cr[WS(csr, 3)] = FNMS(KP707106781, Ts, Tp);
Chris@42 112 }
Chris@42 113 }
Chris@42 114 }
Chris@42 115
Chris@42 116 static const kr2c_desc desc = { 12, "r2cfII_12", {21, 0, 24, 0}, &GENUS };
Chris@42 117
Chris@42 118 void X(codelet_r2cfII_12) (planner *p) {
Chris@42 119 X(kr2c_register) (p, r2cfII_12, &desc);
Chris@42 120 }
Chris@42 121
Chris@42 122 #else /* HAVE_FMA */
Chris@42 123
Chris@42 124 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 12 -name r2cfII_12 -dft-II -include r2cfII.h */
Chris@42 125
Chris@42 126 /*
Chris@42 127 * This function contains 43 FP additions, 12 FP multiplications,
Chris@42 128 * (or, 39 additions, 8 multiplications, 4 fused multiply/add),
Chris@42 129 * 28 stack variables, 5 constants, and 24 memory accesses
Chris@42 130 */
Chris@42 131 #include "r2cfII.h"
Chris@42 132
Chris@42 133 static void r2cfII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 134 {
Chris@42 135 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@42 136 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 137 DK(KP612372435, +0.612372435695794524549321018676472847991486870);
Chris@42 138 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 139 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 140 {
Chris@42 141 INT i;
Chris@42 142 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@42 143 E Tx, Tg, T4, Tz, Ty, Tj, TA, T9, Tm, Tl, Te, Tp, To, Tf, TE;
Chris@42 144 E TF;
Chris@42 145 {
Chris@42 146 E T1, T3, T2, Th, Ti;
Chris@42 147 T1 = R0[0];
Chris@42 148 T3 = R0[WS(rs, 2)];
Chris@42 149 T2 = R0[WS(rs, 4)];
Chris@42 150 Tx = KP866025403 * (T2 + T3);
Chris@42 151 Tg = FMA(KP500000000, T3 - T2, T1);
Chris@42 152 T4 = T1 + T2 - T3;
Chris@42 153 Tz = R0[WS(rs, 3)];
Chris@42 154 Th = R0[WS(rs, 5)];
Chris@42 155 Ti = R0[WS(rs, 1)];
Chris@42 156 Ty = Th + Ti;
Chris@42 157 Tj = KP866025403 * (Th - Ti);
Chris@42 158 TA = FMA(KP500000000, Ty, Tz);
Chris@42 159 }
Chris@42 160 {
Chris@42 161 E T5, T6, T7, T8;
Chris@42 162 T5 = R1[WS(rs, 1)];
Chris@42 163 T6 = R1[WS(rs, 5)];
Chris@42 164 T7 = R1[WS(rs, 3)];
Chris@42 165 T8 = T6 - T7;
Chris@42 166 T9 = T5 + T8;
Chris@42 167 Tm = KP612372435 * (T6 + T7);
Chris@42 168 Tl = FNMS(KP353553390, T8, KP707106781 * T5);
Chris@42 169 }
Chris@42 170 {
Chris@42 171 E Td, Ta, Tb, Tc;
Chris@42 172 Td = R1[WS(rs, 4)];
Chris@42 173 Ta = R1[WS(rs, 2)];
Chris@42 174 Tb = R1[0];
Chris@42 175 Tc = Ta - Tb;
Chris@42 176 Te = Tc - Td;
Chris@42 177 Tp = FMA(KP353553390, Tc, KP707106781 * Td);
Chris@42 178 To = KP612372435 * (Ta + Tb);
Chris@42 179 }
Chris@42 180 Tf = KP707106781 * (T9 + Te);
Chris@42 181 Cr[WS(csr, 1)] = T4 - Tf;
Chris@42 182 Cr[WS(csr, 4)] = T4 + Tf;
Chris@42 183 TE = KP707106781 * (Te - T9);
Chris@42 184 TF = Tz - Ty;
Chris@42 185 Ci[WS(csi, 4)] = TE - TF;
Chris@42 186 Ci[WS(csi, 1)] = TE + TF;
Chris@42 187 {
Chris@42 188 E Tk, TB, Tr, Tw, Tn, Tq;
Chris@42 189 Tk = Tg - Tj;
Chris@42 190 TB = Tx - TA;
Chris@42 191 Tn = Tl - Tm;
Chris@42 192 Tq = To - Tp;
Chris@42 193 Tr = Tn + Tq;
Chris@42 194 Tw = Tn - Tq;
Chris@42 195 Cr[WS(csr, 5)] = Tk - Tr;
Chris@42 196 Ci[WS(csi, 2)] = Tw + TB;
Chris@42 197 Cr[0] = Tk + Tr;
Chris@42 198 Ci[WS(csi, 3)] = Tw - TB;
Chris@42 199 }
Chris@42 200 {
Chris@42 201 E Ts, TD, Tv, TC, Tt, Tu;
Chris@42 202 Ts = Tg + Tj;
Chris@42 203 TD = Tx + TA;
Chris@42 204 Tt = To + Tp;
Chris@42 205 Tu = Tm + Tl;
Chris@42 206 Tv = Tt - Tu;
Chris@42 207 TC = Tu + Tt;
Chris@42 208 Cr[WS(csr, 3)] = Ts - Tv;
Chris@42 209 Ci[WS(csi, 5)] = TD - TC;
Chris@42 210 Cr[WS(csr, 2)] = Ts + Tv;
Chris@42 211 Ci[0] = -(TC + TD);
Chris@42 212 }
Chris@42 213 }
Chris@42 214 }
Chris@42 215 }
Chris@42 216
Chris@42 217 static const kr2c_desc desc = { 12, "r2cfII_12", {39, 8, 4, 0}, &GENUS };
Chris@42 218
Chris@42 219 void X(codelet_r2cfII_12) (planner *p) {
Chris@42 220 X(kr2c_register) (p, r2cfII_12, &desc);
Chris@42 221 }
Chris@42 222
Chris@42 223 #endif /* HAVE_FMA */