annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cbIII_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:44 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 42 FP additions, 20 FP multiplications,
Chris@82 32 * (or, 30 additions, 8 multiplications, 12 fused multiply/add),
Chris@82 33 * 25 stack variables, 4 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cbIII.h"
Chris@82 36
Chris@82 37 static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 40 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 41 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 42 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT i;
Chris@82 45 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@82 46 E T5, Tx, Tb, Te, Tw, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
Chris@82 47 {
Chris@82 48 E T1, T2, T3, T4;
Chris@82 49 T1 = Cr[WS(csr, 1)];
Chris@82 50 T2 = Cr[WS(csr, 5)];
Chris@82 51 T3 = Cr[WS(csr, 2)];
Chris@82 52 T4 = T2 + T3;
Chris@82 53 T5 = T1 + T4;
Chris@82 54 Tx = T2 - T3;
Chris@82 55 Tb = FNMS(KP2_000000000, T1, T4);
Chris@82 56 }
Chris@82 57 {
Chris@82 58 E Tq, Tc, Td, Tr;
Chris@82 59 Tq = Ci[WS(csi, 1)];
Chris@82 60 Tc = Ci[WS(csi, 5)];
Chris@82 61 Td = Ci[WS(csi, 2)];
Chris@82 62 Tr = Td - Tc;
Chris@82 63 Te = Tc + Td;
Chris@82 64 Tw = FMA(KP2_000000000, Tq, Tr);
Chris@82 65 Ts = Tq - Tr;
Chris@82 66 }
Chris@82 67 {
Chris@82 68 E T6, T7, T8, T9;
Chris@82 69 T6 = Cr[WS(csr, 4)];
Chris@82 70 T7 = Cr[0];
Chris@82 71 T8 = Cr[WS(csr, 3)];
Chris@82 72 T9 = T7 + T8;
Chris@82 73 Ta = T6 + T9;
Chris@82 74 TA = T7 - T8;
Chris@82 75 Tg = FNMS(KP2_000000000, T6, T9);
Chris@82 76 }
Chris@82 77 {
Chris@82 78 E To, Th, Ti, Tn;
Chris@82 79 To = Ci[WS(csi, 4)];
Chris@82 80 Th = Ci[0];
Chris@82 81 Ti = Ci[WS(csi, 3)];
Chris@82 82 Tn = Ti - Th;
Chris@82 83 Tj = Th + Ti;
Chris@82 84 Tz = FMA(KP2_000000000, To, Tn);
Chris@82 85 Tp = Tn - To;
Chris@82 86 }
Chris@82 87 R0[0] = KP2_000000000 * (T5 + Ta);
Chris@82 88 R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
Chris@82 89 Tt = Tp - Ts;
Chris@82 90 Tu = T5 - Ta;
Chris@82 91 R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
Chris@82 92 R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
Chris@82 93 {
Chris@82 94 E Tf, Tk, Tv, Ty, TB, TC;
Chris@82 95 Tf = FMA(KP1_732050807, Te, Tb);
Chris@82 96 Tk = FNMS(KP1_732050807, Tj, Tg);
Chris@82 97 Tv = Tk - Tf;
Chris@82 98 Ty = FMA(KP1_732050807, Tx, Tw);
Chris@82 99 TB = FNMS(KP1_732050807, TA, Tz);
Chris@82 100 TC = Ty + TB;
Chris@82 101 R0[WS(rs, 2)] = Tf + Tk;
Chris@82 102 R0[WS(rs, 5)] = TB - Ty;
Chris@82 103 R1[0] = KP707106781 * (Tv - TC);
Chris@82 104 R1[WS(rs, 3)] = KP707106781 * (Tv + TC);
Chris@82 105 }
Chris@82 106 {
Chris@82 107 E Tl, Tm, TF, TD, TE, TG;
Chris@82 108 Tl = FNMS(KP1_732050807, Te, Tb);
Chris@82 109 Tm = FMA(KP1_732050807, Tj, Tg);
Chris@82 110 TF = Tl - Tm;
Chris@82 111 TD = FMA(KP1_732050807, TA, Tz);
Chris@82 112 TE = FNMS(KP1_732050807, Tx, Tw);
Chris@82 113 TG = TE + TD;
Chris@82 114 R0[WS(rs, 4)] = -(Tl + Tm);
Chris@82 115 R1[WS(rs, 2)] = KP707106781 * (TF + TG);
Chris@82 116 R0[WS(rs, 1)] = TD - TE;
Chris@82 117 R1[WS(rs, 5)] = KP707106781 * (TF - TG);
Chris@82 118 }
Chris@82 119 }
Chris@82 120 }
Chris@82 121 }
Chris@82 122
Chris@82 123 static const kr2c_desc desc = { 12, "r2cbIII_12", {30, 8, 12, 0}, &GENUS };
Chris@82 124
Chris@82 125 void X(codelet_r2cbIII_12) (planner *p) {
Chris@82 126 X(kr2c_register) (p, r2cbIII_12, &desc);
Chris@82 127 }
Chris@82 128
Chris@82 129 #else
Chris@82 130
Chris@82 131 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 132
Chris@82 133 /*
Chris@82 134 * This function contains 42 FP additions, 20 FP multiplications,
Chris@82 135 * (or, 38 additions, 16 multiplications, 4 fused multiply/add),
Chris@82 136 * 25 stack variables, 4 constants, and 24 memory accesses
Chris@82 137 */
Chris@82 138 #include "rdft/scalar/r2cbIII.h"
Chris@82 139
Chris@82 140 static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 141 {
Chris@82 142 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@82 143 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 144 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 145 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 146 {
Chris@82 147 INT i;
Chris@82 148 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
Chris@82 149 E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
Chris@82 150 {
Chris@82 151 E T1, T2, T3, T4;
Chris@82 152 T1 = Cr[WS(csr, 1)];
Chris@82 153 T2 = Cr[WS(csr, 5)];
Chris@82 154 T3 = Cr[WS(csr, 2)];
Chris@82 155 T4 = T2 + T3;
Chris@82 156 T5 = T1 + T4;
Chris@82 157 Tw = KP866025403 * (T2 - T3);
Chris@82 158 Tb = FNMS(KP500000000, T4, T1);
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E Tq, Tc, Td, Tr;
Chris@82 162 Tq = Ci[WS(csi, 1)];
Chris@82 163 Tc = Ci[WS(csi, 5)];
Chris@82 164 Td = Ci[WS(csi, 2)];
Chris@82 165 Tr = Td - Tc;
Chris@82 166 Te = KP866025403 * (Tc + Td);
Chris@82 167 Tx = FMA(KP500000000, Tr, Tq);
Chris@82 168 Ts = Tq - Tr;
Chris@82 169 }
Chris@82 170 {
Chris@82 171 E T6, T7, T8, T9;
Chris@82 172 T6 = Cr[WS(csr, 4)];
Chris@82 173 T7 = Cr[0];
Chris@82 174 T8 = Cr[WS(csr, 3)];
Chris@82 175 T9 = T7 + T8;
Chris@82 176 Ta = T6 + T9;
Chris@82 177 TA = KP866025403 * (T7 - T8);
Chris@82 178 Tg = FNMS(KP500000000, T9, T6);
Chris@82 179 }
Chris@82 180 {
Chris@82 181 E To, Th, Ti, Tn;
Chris@82 182 To = Ci[WS(csi, 4)];
Chris@82 183 Th = Ci[0];
Chris@82 184 Ti = Ci[WS(csi, 3)];
Chris@82 185 Tn = Ti - Th;
Chris@82 186 Tj = KP866025403 * (Th + Ti);
Chris@82 187 Tz = FMA(KP500000000, Tn, To);
Chris@82 188 Tp = Tn - To;
Chris@82 189 }
Chris@82 190 R0[0] = KP2_000000000 * (T5 + Ta);
Chris@82 191 R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
Chris@82 192 Tt = Tp - Ts;
Chris@82 193 Tu = T5 - Ta;
Chris@82 194 R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
Chris@82 195 R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
Chris@82 196 {
Chris@82 197 E Tf, Tk, Tv, Ty, TB, TC;
Chris@82 198 Tf = Tb - Te;
Chris@82 199 Tk = Tg + Tj;
Chris@82 200 Tv = Tf - Tk;
Chris@82 201 Ty = Tw + Tx;
Chris@82 202 TB = Tz - TA;
Chris@82 203 TC = Ty + TB;
Chris@82 204 R0[WS(rs, 2)] = -(KP2_000000000 * (Tf + Tk));
Chris@82 205 R0[WS(rs, 5)] = KP2_000000000 * (TB - Ty);
Chris@82 206 R1[0] = KP1_414213562 * (Tv - TC);
Chris@82 207 R1[WS(rs, 3)] = KP1_414213562 * (Tv + TC);
Chris@82 208 }
Chris@82 209 {
Chris@82 210 E Tl, Tm, TF, TD, TE, TG;
Chris@82 211 Tl = Tb + Te;
Chris@82 212 Tm = Tg - Tj;
Chris@82 213 TF = Tm - Tl;
Chris@82 214 TD = TA + Tz;
Chris@82 215 TE = Tx - Tw;
Chris@82 216 TG = TE + TD;
Chris@82 217 R0[WS(rs, 4)] = KP2_000000000 * (Tl + Tm);
Chris@82 218 R1[WS(rs, 2)] = KP1_414213562 * (TF + TG);
Chris@82 219 R0[WS(rs, 1)] = KP2_000000000 * (TD - TE);
Chris@82 220 R1[WS(rs, 5)] = KP1_414213562 * (TF - TG);
Chris@82 221 }
Chris@82 222 }
Chris@82 223 }
Chris@82 224 }
Chris@82 225
Chris@82 226 static const kr2c_desc desc = { 12, "r2cbIII_12", {38, 16, 4, 0}, &GENUS };
Chris@82 227
Chris@82 228 void X(codelet_r2cbIII_12) (planner *p) {
Chris@82 229 X(kr2c_register) (p, r2cbIII_12, &desc);
Chris@82 230 }
Chris@82 231
Chris@82 232 #endif