annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cfII_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:43 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cfII_9 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 42 FP additions, 34 FP multiplications,
Chris@82 32 * (or, 12 additions, 4 multiplications, 30 fused multiply/add),
Chris@82 33 * 48 stack variables, 17 constants, and 18 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cfII.h"
Chris@82 36
Chris@82 37 static void r2cfII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@82 40 DK(KP879385241, +0.879385241571816768108218554649462939872416269);
Chris@82 41 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 42 DK(KP898197570, +0.898197570222573798468955502359086394667167570);
Chris@82 43 DK(KP673648177, +0.673648177666930348851716626769314796000375677);
Chris@82 44 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@82 45 DK(KP907603734, +0.907603734547952313649323976213898122064543220);
Chris@82 46 DK(KP666666666, +0.666666666666666666666666666666666666666666667);
Chris@82 47 DK(KP826351822, +0.826351822333069651148283373230685203999624323);
Chris@82 48 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 49 DK(KP315207469, +0.315207469095904627298647952427796244129086440);
Chris@82 50 DK(KP420276625, +0.420276625461206169731530603237061658838781920);
Chris@82 51 DK(KP203604859, +0.203604859554852403062088995281827210665664861);
Chris@82 52 DK(KP152703644, +0.152703644666139302296566746461370407999248646);
Chris@82 53 DK(KP726681596, +0.726681596905677465811651808188092531873167623);
Chris@82 54 DK(KP968908795, +0.968908795874236621082202410917456709164223497);
Chris@82 55 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 56 {
Chris@82 57 INT i;
Chris@82 58 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@82 59 E T1, T4, To, Ta, Tm, TB, Tq, Tt, Tf, Tj, TA, Tr, Ts, T2, T3;
Chris@82 60 E T5, Tg;
Chris@82 61 T1 = R0[0];
Chris@82 62 T2 = R0[WS(rs, 3)];
Chris@82 63 T3 = R1[WS(rs, 1)];
Chris@82 64 T4 = T2 - T3;
Chris@82 65 To = T2 + T3;
Chris@82 66 {
Chris@82 67 E T6, T9, Tk, T7, T8, Tl;
Chris@82 68 T6 = R0[WS(rs, 1)];
Chris@82 69 T7 = R0[WS(rs, 4)];
Chris@82 70 T8 = R1[WS(rs, 2)];
Chris@82 71 T9 = T7 - T8;
Chris@82 72 Tk = T7 + T8;
Chris@82 73 Ta = T6 + T9;
Chris@82 74 Tl = FNMS(KP500000000, T9, T6);
Chris@82 75 Tm = FMA(KP968908795, Tl, Tk);
Chris@82 76 TB = FNMS(KP726681596, Tk, Tl);
Chris@82 77 Tq = FNMS(KP152703644, Tk, Tl);
Chris@82 78 Tt = FMA(KP203604859, Tl, Tk);
Chris@82 79 }
Chris@82 80 {
Chris@82 81 E Tb, Te, Ti, Tc, Td, Th;
Chris@82 82 Tb = R0[WS(rs, 2)];
Chris@82 83 Tc = R1[0];
Chris@82 84 Td = R1[WS(rs, 3)];
Chris@82 85 Te = Tc + Td;
Chris@82 86 Ti = Tc - Td;
Chris@82 87 Tf = Tb - Te;
Chris@82 88 Th = FMA(KP500000000, Te, Tb);
Chris@82 89 Tj = FNMS(KP152703644, Ti, Th);
Chris@82 90 TA = FMA(KP203604859, Th, Ti);
Chris@82 91 Tr = FNMS(KP420276625, Th, Ti);
Chris@82 92 Ts = FMA(KP315207469, Ti, Th);
Chris@82 93 }
Chris@82 94 Ci[WS(csi, 1)] = KP866025403 * (Tf - Ta);
Chris@82 95 T5 = T1 + T4;
Chris@82 96 Tg = Ta + Tf;
Chris@82 97 Cr[WS(csr, 1)] = FNMS(KP500000000, Tg, T5);
Chris@82 98 Cr[WS(csr, 4)] = T5 + Tg;
Chris@82 99 {
Chris@82 100 E Ty, Tx, Tz, Tn, TD, TC;
Chris@82 101 Tx = FNMS(KP826351822, Tr, Tq);
Chris@82 102 Ty = FNMS(KP666666666, Tx, Tt);
Chris@82 103 Tz = FMA(KP907603734, Ty, Ts);
Chris@82 104 Ci[WS(csi, 2)] = KP866025403 * (FNMS(KP939692620, Tz, To));
Chris@82 105 Tn = FMA(KP673648177, Tm, Tj);
Chris@82 106 TC = FNMS(KP898197570, TB, TA);
Chris@82 107 TD = FNMS(KP666666666, Tn, TC);
Chris@82 108 Ci[0] = -(KP984807753 * (FMA(KP879385241, To, Tn)));
Chris@82 109 Ci[WS(csi, 3)] = -(KP866025403 * (FMA(KP852868531, TD, To)));
Chris@82 110 {
Chris@82 111 E Tp, Tv, TF, TG, Tu, TE, Tw;
Chris@82 112 Tp = FNMS(KP500000000, T4, T1);
Chris@82 113 Tu = FNMS(KP907603734, Tt, Ts);
Chris@82 114 Tv = FNMS(KP666666666, Tu, Tr);
Chris@82 115 TE = FNMS(KP673648177, Tm, Tj);
Chris@82 116 TF = FMA(KP898197570, TB, TA);
Chris@82 117 TG = FMA(KP500000000, TF, TE);
Chris@82 118 Cr[WS(csr, 3)] = FNMS(KP852868531, TG, Tp);
Chris@82 119 Cr[0] = FMA(KP852868531, TF, Tp);
Chris@82 120 Tw = FMA(KP826351822, Tv, Tq);
Chris@82 121 Cr[WS(csr, 2)] = FNMS(KP852868531, Tw, Tp);
Chris@82 122 }
Chris@82 123 }
Chris@82 124 }
Chris@82 125 }
Chris@82 126 }
Chris@82 127
Chris@82 128 static const kr2c_desc desc = { 9, "r2cfII_9", {12, 4, 30, 0}, &GENUS };
Chris@82 129
Chris@82 130 void X(codelet_r2cfII_9) (planner *p) {
Chris@82 131 X(kr2c_register) (p, r2cfII_9, &desc);
Chris@82 132 }
Chris@82 133
Chris@82 134 #else
Chris@82 135
Chris@82 136 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 9 -name r2cfII_9 -dft-II -include rdft/scalar/r2cfII.h */
Chris@82 137
Chris@82 138 /*
Chris@82 139 * This function contains 42 FP additions, 30 FP multiplications,
Chris@82 140 * (or, 25 additions, 13 multiplications, 17 fused multiply/add),
Chris@82 141 * 39 stack variables, 14 constants, and 18 memory accesses
Chris@82 142 */
Chris@82 143 #include "rdft/scalar/r2cfII.h"
Chris@82 144
Chris@82 145 static void r2cfII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 146 {
Chris@82 147 DK(KP663413948, +0.663413948168938396205421319635891297216863310);
Chris@82 148 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@82 149 DK(KP556670399, +0.556670399226419366452912952047023132968291906);
Chris@82 150 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 151 DK(KP852868531, +0.852868531952443209628250963940074071936020296);
Chris@82 152 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@82 153 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 154 DK(KP150383733, +0.150383733180435296639271897612501926072238258);
Chris@82 155 DK(KP813797681, +0.813797681349373692844693217248393223289101568);
Chris@82 156 DK(KP342020143, +0.342020143325668733044099614682259580763083368);
Chris@82 157 DK(KP939692620, +0.939692620785908384054109277324731469936208134);
Chris@82 158 DK(KP296198132, +0.296198132726023843175338011893050938967728390);
Chris@82 159 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 160 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 161 {
Chris@82 162 INT i;
Chris@82 163 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@82 164 E T1, T4, To, Ta, Tl, Tk, Tf, Ti, Th, T2, T3, T5, Tg;
Chris@82 165 T1 = R0[0];
Chris@82 166 T2 = R1[WS(rs, 1)];
Chris@82 167 T3 = R0[WS(rs, 3)];
Chris@82 168 T4 = T2 - T3;
Chris@82 169 To = T2 + T3;
Chris@82 170 {
Chris@82 171 E T6, T7, T8, T9;
Chris@82 172 T6 = R0[WS(rs, 1)];
Chris@82 173 T7 = R1[WS(rs, 2)];
Chris@82 174 T8 = R0[WS(rs, 4)];
Chris@82 175 T9 = T7 - T8;
Chris@82 176 Ta = T6 - T9;
Chris@82 177 Tl = T7 + T8;
Chris@82 178 Tk = FMA(KP500000000, T9, T6);
Chris@82 179 }
Chris@82 180 {
Chris@82 181 E Tb, Tc, Td, Te;
Chris@82 182 Tb = R0[WS(rs, 2)];
Chris@82 183 Tc = R1[0];
Chris@82 184 Td = R1[WS(rs, 3)];
Chris@82 185 Te = Tc + Td;
Chris@82 186 Tf = Tb - Te;
Chris@82 187 Ti = FMA(KP500000000, Te, Tb);
Chris@82 188 Th = Tc - Td;
Chris@82 189 }
Chris@82 190 Ci[WS(csi, 1)] = KP866025403 * (Tf - Ta);
Chris@82 191 T5 = T1 - T4;
Chris@82 192 Tg = Ta + Tf;
Chris@82 193 Cr[WS(csr, 1)] = FNMS(KP500000000, Tg, T5);
Chris@82 194 Cr[WS(csr, 4)] = T5 + Tg;
Chris@82 195 {
Chris@82 196 E Tr, Tt, Tw, Tv, Tu, Tp, Tq, Ts, Tj, Tm, Tn;
Chris@82 197 Tr = FMA(KP500000000, T4, T1);
Chris@82 198 Tt = FMA(KP296198132, Th, KP939692620 * Ti);
Chris@82 199 Tw = FNMS(KP813797681, Th, KP342020143 * Ti);
Chris@82 200 Tv = FNMS(KP984807753, Tk, KP150383733 * Tl);
Chris@82 201 Tu = FMA(KP173648177, Tk, KP852868531 * Tl);
Chris@82 202 Tp = FNMS(KP556670399, Tl, KP766044443 * Tk);
Chris@82 203 Tq = FMA(KP852868531, Th, KP173648177 * Ti);
Chris@82 204 Ts = Tp + Tq;
Chris@82 205 Tj = FNMS(KP984807753, Ti, KP150383733 * Th);
Chris@82 206 Tm = FMA(KP642787609, Tk, KP663413948 * Tl);
Chris@82 207 Tn = Tj - Tm;
Chris@82 208 Ci[0] = FNMS(KP866025403, To, Tn);
Chris@82 209 Cr[0] = Tr + Ts;
Chris@82 210 Ci[WS(csi, 3)] = FNMS(KP500000000, Tn, KP866025403 * ((Tp - Tq) - To));
Chris@82 211 Cr[WS(csr, 3)] = FMA(KP866025403, Tm + Tj, Tr) - (KP500000000 * Ts);
Chris@82 212 Ci[WS(csi, 2)] = FMA(KP866025403, To - (Tu + Tt), KP500000000 * (Tw - Tv));
Chris@82 213 Cr[WS(csr, 2)] = FMA(KP500000000, Tt - Tu, Tr) + (KP866025403 * (Tv + Tw));
Chris@82 214 }
Chris@82 215 }
Chris@82 216 }
Chris@82 217 }
Chris@82 218
Chris@82 219 static const kr2c_desc desc = { 9, "r2cfII_9", {25, 13, 17, 0}, &GENUS };
Chris@82 220
Chris@82 221 void X(codelet_r2cfII_9) (planner *p) {
Chris@82 222 X(kr2c_register) (p, r2cfII_9, &desc);
Chris@82 223 }
Chris@82 224
Chris@82 225 #endif