annotate src/fftw-3.3.8/dft/simd/common/n1fv_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:51 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include dft/simd/n1f.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 74 FP additions, 48 FP multiplications,
Chris@82 32 * (or, 32 additions, 6 multiplications, 42 fused multiply/add),
Chris@82 33 * 51 stack variables, 6 constants, and 28 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n1f.h"
Chris@82 36
Chris@82 37 static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 40 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 41 DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 42 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 43 DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 44 DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 45 {
Chris@82 46 INT i;
Chris@82 47 const R *xi;
Chris@82 48 R *xo;
Chris@82 49 xi = ri;
Chris@82 50 xo = ro;
Chris@82 51 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 52 V T3, TH, Ts, TV, TW, Tt, Tu, TU, Ta, To, Th, Tp, TC, Tx, TK;
Chris@82 53 V TQ, TN, TR, T14, TZ, T1, T2;
Chris@82 54 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 55 T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 56 T3 = VSUB(T1, T2);
Chris@82 57 TH = VADD(T1, T2);
Chris@82 58 {
Chris@82 59 V T6, TI, T9, TJ, Tn, TP, Tk, TO, Tg, TM, Td, TL;
Chris@82 60 {
Chris@82 61 V T4, T5, Ti, Tj;
Chris@82 62 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 63 T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 64 T6 = VSUB(T4, T5);
Chris@82 65 TI = VADD(T4, T5);
Chris@82 66 {
Chris@82 67 V T7, T8, Tl, Tm;
Chris@82 68 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 69 T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 70 T9 = VSUB(T7, T8);
Chris@82 71 TJ = VADD(T7, T8);
Chris@82 72 Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 73 Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 74 Tn = VSUB(Tl, Tm);
Chris@82 75 TP = VADD(Tl, Tm);
Chris@82 76 }
Chris@82 77 Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 78 Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 79 Tk = VSUB(Ti, Tj);
Chris@82 80 TO = VADD(Ti, Tj);
Chris@82 81 {
Chris@82 82 V Te, Tf, Tb, Tc;
Chris@82 83 Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 84 Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 85 Tg = VSUB(Te, Tf);
Chris@82 86 TM = VADD(Te, Tf);
Chris@82 87 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 88 Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 89 Td = VSUB(Tb, Tc);
Chris@82 90 TL = VADD(Tb, Tc);
Chris@82 91 }
Chris@82 92 }
Chris@82 93 Ts = VSUB(T9, T6);
Chris@82 94 TV = VSUB(TL, TM);
Chris@82 95 TW = VSUB(TJ, TI);
Chris@82 96 Tt = VSUB(Tn, Tk);
Chris@82 97 Tu = VSUB(Tg, Td);
Chris@82 98 TU = VSUB(TO, TP);
Chris@82 99 Ta = VADD(T6, T9);
Chris@82 100 To = VADD(Tk, Tn);
Chris@82 101 Th = VADD(Td, Tg);
Chris@82 102 Tp = VFNMS(LDK(KP356895867), Ta, To);
Chris@82 103 TC = VFNMS(LDK(KP356895867), To, Th);
Chris@82 104 Tx = VFNMS(LDK(KP356895867), Th, Ta);
Chris@82 105 TK = VADD(TI, TJ);
Chris@82 106 TQ = VADD(TO, TP);
Chris@82 107 TN = VADD(TL, TM);
Chris@82 108 TR = VFNMS(LDK(KP356895867), TQ, TN);
Chris@82 109 T14 = VFNMS(LDK(KP356895867), TN, TK);
Chris@82 110 TZ = VFNMS(LDK(KP356895867), TK, TQ);
Chris@82 111 }
Chris@82 112 ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)]));
Chris@82 113 ST(&(xo[0]), VADD(TH, VADD(TK, VADD(TN, TQ))), ovs, &(xo[0]));
Chris@82 114 {
Chris@82 115 V Tr, Tw, Tq, Tv;
Chris@82 116 Tq = VFNMS(LDK(KP692021471), Tp, Th);
Chris@82 117 Tr = VFNMS(LDK(KP900968867), Tq, T3);
Chris@82 118 Tv = VFMA(LDK(KP554958132), Tu, Tt);
Chris@82 119 Tw = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tv, Ts));
Chris@82 120 ST(&(xo[WS(os, 5)]), VFNMSI(Tw, Tr), ovs, &(xo[WS(os, 1)]));
Chris@82 121 ST(&(xo[WS(os, 9)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)]));
Chris@82 122 }
Chris@82 123 {
Chris@82 124 V T16, T18, T15, T17;
Chris@82 125 T15 = VFNMS(LDK(KP692021471), T14, TQ);
Chris@82 126 T16 = VFNMS(LDK(KP900968867), T15, TH);
Chris@82 127 T17 = VFNMS(LDK(KP554958132), TU, TW);
Chris@82 128 T18 = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), T17, TV));
Chris@82 129 ST(&(xo[WS(os, 6)]), VFMAI(T18, T16), ovs, &(xo[0]));
Chris@82 130 ST(&(xo[WS(os, 8)]), VFNMSI(T18, T16), ovs, &(xo[0]));
Chris@82 131 }
Chris@82 132 {
Chris@82 133 V Tz, TB, Ty, TA;
Chris@82 134 Ty = VFNMS(LDK(KP692021471), Tx, To);
Chris@82 135 Tz = VFNMS(LDK(KP900968867), Ty, T3);
Chris@82 136 TA = VFMA(LDK(KP554958132), Tt, Ts);
Chris@82 137 TB = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), TA, Tu));
Chris@82 138 ST(&(xo[WS(os, 13)]), VFNMSI(TB, Tz), ovs, &(xo[WS(os, 1)]));
Chris@82 139 ST(&(xo[WS(os, 1)]), VFMAI(TB, Tz), ovs, &(xo[WS(os, 1)]));
Chris@82 140 }
Chris@82 141 {
Chris@82 142 V TT, TY, TS, TX;
Chris@82 143 TS = VFNMS(LDK(KP692021471), TR, TK);
Chris@82 144 TT = VFNMS(LDK(KP900968867), TS, TH);
Chris@82 145 TX = VFMA(LDK(KP554958132), TW, TV);
Chris@82 146 TY = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TX, TU));
Chris@82 147 ST(&(xo[WS(os, 4)]), VFMAI(TY, TT), ovs, &(xo[0]));
Chris@82 148 ST(&(xo[WS(os, 10)]), VFNMSI(TY, TT), ovs, &(xo[0]));
Chris@82 149 }
Chris@82 150 {
Chris@82 151 V T11, T13, T10, T12;
Chris@82 152 T10 = VFNMS(LDK(KP692021471), TZ, TN);
Chris@82 153 T11 = VFNMS(LDK(KP900968867), T10, TH);
Chris@82 154 T12 = VFMA(LDK(KP554958132), TV, TU);
Chris@82 155 T13 = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), T12, TW));
Chris@82 156 ST(&(xo[WS(os, 2)]), VFMAI(T13, T11), ovs, &(xo[0]));
Chris@82 157 ST(&(xo[WS(os, 12)]), VFNMSI(T13, T11), ovs, &(xo[0]));
Chris@82 158 }
Chris@82 159 {
Chris@82 160 V TE, TG, TD, TF;
Chris@82 161 TD = VFNMS(LDK(KP692021471), TC, Ta);
Chris@82 162 TE = VFNMS(LDK(KP900968867), TD, T3);
Chris@82 163 TF = VFNMS(LDK(KP554958132), Ts, Tu);
Chris@82 164 TG = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), TF, Tt));
Chris@82 165 ST(&(xo[WS(os, 11)]), VFNMSI(TG, TE), ovs, &(xo[WS(os, 1)]));
Chris@82 166 ST(&(xo[WS(os, 3)]), VFMAI(TG, TE), ovs, &(xo[WS(os, 1)]));
Chris@82 167 }
Chris@82 168 }
Chris@82 169 }
Chris@82 170 VLEAVE();
Chris@82 171 }
Chris@82 172
Chris@82 173 static const kdft_desc desc = { 14, XSIMD_STRING("n1fv_14"), {32, 6, 42, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 174
Chris@82 175 void XSIMD(codelet_n1fv_14) (planner *p) {
Chris@82 176 X(kdft_register) (p, n1fv_14, &desc);
Chris@82 177 }
Chris@82 178
Chris@82 179 #else
Chris@82 180
Chris@82 181 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 14 -name n1fv_14 -include dft/simd/n1f.h */
Chris@82 182
Chris@82 183 /*
Chris@82 184 * This function contains 74 FP additions, 36 FP multiplications,
Chris@82 185 * (or, 50 additions, 12 multiplications, 24 fused multiply/add),
Chris@82 186 * 33 stack variables, 6 constants, and 28 memory accesses
Chris@82 187 */
Chris@82 188 #include "dft/simd/n1f.h"
Chris@82 189
Chris@82 190 static void n1fv_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 191 {
Chris@82 192 DVK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@82 193 DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 194 DVK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@82 195 DVK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@82 196 DVK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@82 197 DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 198 {
Chris@82 199 INT i;
Chris@82 200 const R *xi;
Chris@82 201 R *xo;
Chris@82 202 xi = ri;
Chris@82 203 xo = ro;
Chris@82 204 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 205 V T3, Ty, To, TK, Tr, TE, Ta, TJ, Tq, TB, Th, TL, Ts, TH, T1;
Chris@82 206 V T2;
Chris@82 207 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 208 T2 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 209 T3 = VSUB(T1, T2);
Chris@82 210 Ty = VADD(T1, T2);
Chris@82 211 {
Chris@82 212 V Tk, TC, Tn, TD;
Chris@82 213 {
Chris@82 214 V Ti, Tj, Tl, Tm;
Chris@82 215 Ti = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 216 Tj = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 217 Tk = VSUB(Ti, Tj);
Chris@82 218 TC = VADD(Ti, Tj);
Chris@82 219 Tl = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 220 Tm = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 221 Tn = VSUB(Tl, Tm);
Chris@82 222 TD = VADD(Tl, Tm);
Chris@82 223 }
Chris@82 224 To = VADD(Tk, Tn);
Chris@82 225 TK = VSUB(TC, TD);
Chris@82 226 Tr = VSUB(Tn, Tk);
Chris@82 227 TE = VADD(TC, TD);
Chris@82 228 }
Chris@82 229 {
Chris@82 230 V T6, Tz, T9, TA;
Chris@82 231 {
Chris@82 232 V T4, T5, T7, T8;
Chris@82 233 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 234 T5 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 235 T6 = VSUB(T4, T5);
Chris@82 236 Tz = VADD(T4, T5);
Chris@82 237 T7 = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 238 T8 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 239 T9 = VSUB(T7, T8);
Chris@82 240 TA = VADD(T7, T8);
Chris@82 241 }
Chris@82 242 Ta = VADD(T6, T9);
Chris@82 243 TJ = VSUB(TA, Tz);
Chris@82 244 Tq = VSUB(T9, T6);
Chris@82 245 TB = VADD(Tz, TA);
Chris@82 246 }
Chris@82 247 {
Chris@82 248 V Td, TF, Tg, TG;
Chris@82 249 {
Chris@82 250 V Tb, Tc, Te, Tf;
Chris@82 251 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 252 Tc = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 253 Td = VSUB(Tb, Tc);
Chris@82 254 TF = VADD(Tb, Tc);
Chris@82 255 Te = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 256 Tf = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 257 Tg = VSUB(Te, Tf);
Chris@82 258 TG = VADD(Te, Tf);
Chris@82 259 }
Chris@82 260 Th = VADD(Td, Tg);
Chris@82 261 TL = VSUB(TF, TG);
Chris@82 262 Ts = VSUB(Tg, Td);
Chris@82 263 TH = VADD(TF, TG);
Chris@82 264 }
Chris@82 265 ST(&(xo[WS(os, 7)]), VADD(T3, VADD(Ta, VADD(Th, To))), ovs, &(xo[WS(os, 1)]));
Chris@82 266 ST(&(xo[0]), VADD(Ty, VADD(TB, VADD(TH, TE))), ovs, &(xo[0]));
Chris@82 267 {
Chris@82 268 V Tt, Tp, TP, TQ;
Chris@82 269 Tt = VBYI(VFNMS(LDK(KP781831482), Tr, VFNMS(LDK(KP433883739), Ts, VMUL(LDK(KP974927912), Tq))));
Chris@82 270 Tp = VFMA(LDK(KP623489801), To, VFNMS(LDK(KP900968867), Th, VFNMS(LDK(KP222520933), Ta, T3)));
Chris@82 271 ST(&(xo[WS(os, 5)]), VSUB(Tp, Tt), ovs, &(xo[WS(os, 1)]));
Chris@82 272 ST(&(xo[WS(os, 9)]), VADD(Tp, Tt), ovs, &(xo[WS(os, 1)]));
Chris@82 273 TP = VBYI(VFMA(LDK(KP974927912), TJ, VFMA(LDK(KP433883739), TL, VMUL(LDK(KP781831482), TK))));
Chris@82 274 TQ = VFMA(LDK(KP623489801), TE, VFNMS(LDK(KP900968867), TH, VFNMS(LDK(KP222520933), TB, Ty)));
Chris@82 275 ST(&(xo[WS(os, 2)]), VADD(TP, TQ), ovs, &(xo[0]));
Chris@82 276 ST(&(xo[WS(os, 12)]), VSUB(TQ, TP), ovs, &(xo[0]));
Chris@82 277 }
Chris@82 278 {
Chris@82 279 V Tv, Tu, TM, TI;
Chris@82 280 Tv = VBYI(VFMA(LDK(KP781831482), Tq, VFMA(LDK(KP974927912), Ts, VMUL(LDK(KP433883739), Tr))));
Chris@82 281 Tu = VFMA(LDK(KP623489801), Ta, VFNMS(LDK(KP900968867), To, VFNMS(LDK(KP222520933), Th, T3)));
Chris@82 282 ST(&(xo[WS(os, 13)]), VSUB(Tu, Tv), ovs, &(xo[WS(os, 1)]));
Chris@82 283 ST(&(xo[WS(os, 1)]), VADD(Tu, Tv), ovs, &(xo[WS(os, 1)]));
Chris@82 284 TM = VBYI(VFNMS(LDK(KP433883739), TK, VFNMS(LDK(KP974927912), TL, VMUL(LDK(KP781831482), TJ))));
Chris@82 285 TI = VFMA(LDK(KP623489801), TB, VFNMS(LDK(KP900968867), TE, VFNMS(LDK(KP222520933), TH, Ty)));
Chris@82 286 ST(&(xo[WS(os, 6)]), VSUB(TI, TM), ovs, &(xo[0]));
Chris@82 287 ST(&(xo[WS(os, 8)]), VADD(TM, TI), ovs, &(xo[0]));
Chris@82 288 }
Chris@82 289 {
Chris@82 290 V TO, TN, Tx, Tw;
Chris@82 291 TO = VBYI(VFMA(LDK(KP433883739), TJ, VFNMS(LDK(KP974927912), TK, VMUL(LDK(KP781831482), TL))));
Chris@82 292 TN = VFMA(LDK(KP623489801), TH, VFNMS(LDK(KP222520933), TE, VFNMS(LDK(KP900968867), TB, Ty)));
Chris@82 293 ST(&(xo[WS(os, 4)]), VSUB(TN, TO), ovs, &(xo[0]));
Chris@82 294 ST(&(xo[WS(os, 10)]), VADD(TO, TN), ovs, &(xo[0]));
Chris@82 295 Tx = VBYI(VFMA(LDK(KP433883739), Tq, VFNMS(LDK(KP781831482), Ts, VMUL(LDK(KP974927912), Tr))));
Chris@82 296 Tw = VFMA(LDK(KP623489801), Th, VFNMS(LDK(KP222520933), To, VFNMS(LDK(KP900968867), Ta, T3)));
Chris@82 297 ST(&(xo[WS(os, 11)]), VSUB(Tw, Tx), ovs, &(xo[WS(os, 1)]));
Chris@82 298 ST(&(xo[WS(os, 3)]), VADD(Tw, Tx), ovs, &(xo[WS(os, 1)]));
Chris@82 299 }
Chris@82 300 }
Chris@82 301 }
Chris@82 302 VLEAVE();
Chris@82 303 }
Chris@82 304
Chris@82 305 static const kdft_desc desc = { 14, XSIMD_STRING("n1fv_14"), {50, 12, 24, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 306
Chris@82 307 void XSIMD(codelet_n1fv_14) (planner *p) {
Chris@82 308 X(kdft_register) (p, n1fv_14, &desc);
Chris@82 309 }
Chris@82 310
Chris@82 311 #endif