annotate src/fftw-3.3.8/dft/scalar/codelets/n1_7.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:10 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 60 FP additions, 42 FP multiplications,
Chris@82 32 * (or, 18 additions, 0 multiplications, 42 fused multiply/add),
Chris@82 33 * 41 stack variables, 6 constants, and 28 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 40 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 41 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 42 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 43 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 44 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 45 {
Chris@82 46 INT i;
Chris@82 47 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 48 E T1, Tz, T4, TI, Ta, TG, T7, TH, Tb, Tp, TT, TO, TJ, Tu, Tg;
Chris@82 49 E TB, Tm, TC, Tj, TA, Tn, Ts, TQ, TL, TD, Tx;
Chris@82 50 T1 = ri[0];
Chris@82 51 Tz = ii[0];
Chris@82 52 {
Chris@82 53 E T2, T3, Te, Tf;
Chris@82 54 T2 = ri[WS(is, 1)];
Chris@82 55 T3 = ri[WS(is, 6)];
Chris@82 56 T4 = T2 + T3;
Chris@82 57 TI = T3 - T2;
Chris@82 58 {
Chris@82 59 E T8, T9, T5, T6;
Chris@82 60 T8 = ri[WS(is, 3)];
Chris@82 61 T9 = ri[WS(is, 4)];
Chris@82 62 Ta = T8 + T9;
Chris@82 63 TG = T9 - T8;
Chris@82 64 T5 = ri[WS(is, 2)];
Chris@82 65 T6 = ri[WS(is, 5)];
Chris@82 66 T7 = T5 + T6;
Chris@82 67 TH = T6 - T5;
Chris@82 68 }
Chris@82 69 Tb = FNMS(KP356895867, T7, T4);
Chris@82 70 Tp = FNMS(KP356895867, T4, Ta);
Chris@82 71 TT = FMA(KP554958132, TG, TI);
Chris@82 72 TO = FMA(KP554958132, TH, TG);
Chris@82 73 TJ = FNMS(KP554958132, TI, TH);
Chris@82 74 Tu = FNMS(KP356895867, Ta, T7);
Chris@82 75 Te = ii[WS(is, 2)];
Chris@82 76 Tf = ii[WS(is, 5)];
Chris@82 77 Tg = Te - Tf;
Chris@82 78 TB = Te + Tf;
Chris@82 79 {
Chris@82 80 E Tk, Tl, Th, Ti;
Chris@82 81 Tk = ii[WS(is, 3)];
Chris@82 82 Tl = ii[WS(is, 4)];
Chris@82 83 Tm = Tk - Tl;
Chris@82 84 TC = Tk + Tl;
Chris@82 85 Th = ii[WS(is, 1)];
Chris@82 86 Ti = ii[WS(is, 6)];
Chris@82 87 Tj = Th - Ti;
Chris@82 88 TA = Th + Ti;
Chris@82 89 }
Chris@82 90 Tn = FMA(KP554958132, Tm, Tj);
Chris@82 91 Ts = FMA(KP554958132, Tg, Tm);
Chris@82 92 TQ = FNMS(KP356895867, TB, TA);
Chris@82 93 TL = FNMS(KP356895867, TA, TC);
Chris@82 94 TD = FNMS(KP356895867, TC, TB);
Chris@82 95 Tx = FNMS(KP554958132, Tj, Tg);
Chris@82 96 }
Chris@82 97 ro[0] = T1 + T4 + T7 + Ta;
Chris@82 98 io[0] = Tz + TA + TB + TC;
Chris@82 99 {
Chris@82 100 E To, Td, Tc, TU, TS, TR;
Chris@82 101 To = FMA(KP801937735, Tn, Tg);
Chris@82 102 Tc = FNMS(KP692021471, Tb, Ta);
Chris@82 103 Td = FNMS(KP900968867, Tc, T1);
Chris@82 104 ro[WS(os, 6)] = FNMS(KP974927912, To, Td);
Chris@82 105 ro[WS(os, 1)] = FMA(KP974927912, To, Td);
Chris@82 106 TU = FMA(KP801937735, TT, TH);
Chris@82 107 TR = FNMS(KP692021471, TQ, TC);
Chris@82 108 TS = FNMS(KP900968867, TR, Tz);
Chris@82 109 io[WS(os, 1)] = FMA(KP974927912, TU, TS);
Chris@82 110 io[WS(os, 6)] = FNMS(KP974927912, TU, TS);
Chris@82 111 }
Chris@82 112 {
Chris@82 113 E Tt, Tr, Tq, TP, TN, TM;
Chris@82 114 Tt = FNMS(KP801937735, Ts, Tj);
Chris@82 115 Tq = FNMS(KP692021471, Tp, T7);
Chris@82 116 Tr = FNMS(KP900968867, Tq, T1);
Chris@82 117 ro[WS(os, 5)] = FNMS(KP974927912, Tt, Tr);
Chris@82 118 ro[WS(os, 2)] = FMA(KP974927912, Tt, Tr);
Chris@82 119 TP = FNMS(KP801937735, TO, TI);
Chris@82 120 TM = FNMS(KP692021471, TL, TB);
Chris@82 121 TN = FNMS(KP900968867, TM, Tz);
Chris@82 122 io[WS(os, 2)] = FMA(KP974927912, TP, TN);
Chris@82 123 io[WS(os, 5)] = FNMS(KP974927912, TP, TN);
Chris@82 124 }
Chris@82 125 {
Chris@82 126 E Ty, Tw, Tv, TK, TF, TE;
Chris@82 127 Ty = FNMS(KP801937735, Tx, Tm);
Chris@82 128 Tv = FNMS(KP692021471, Tu, T4);
Chris@82 129 Tw = FNMS(KP900968867, Tv, T1);
Chris@82 130 ro[WS(os, 4)] = FNMS(KP974927912, Ty, Tw);
Chris@82 131 ro[WS(os, 3)] = FMA(KP974927912, Ty, Tw);
Chris@82 132 TK = FNMS(KP801937735, TJ, TG);
Chris@82 133 TE = FNMS(KP692021471, TD, TA);
Chris@82 134 TF = FNMS(KP900968867, TE, Tz);
Chris@82 135 io[WS(os, 3)] = FMA(KP974927912, TK, TF);
Chris@82 136 io[WS(os, 4)] = FNMS(KP974927912, TK, TF);
Chris@82 137 }
Chris@82 138 }
Chris@82 139 }
Chris@82 140 }
Chris@82 141
Chris@82 142 static const kdft_desc desc = { 7, "n1_7", {18, 0, 42, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 143
Chris@82 144 void X(codelet_n1_7) (planner *p) {
Chris@82 145 X(kdft_register) (p, n1_7, &desc);
Chris@82 146 }
Chris@82 147
Chris@82 148 #else
Chris@82 149
Chris@82 150 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include dft/scalar/n.h */
Chris@82 151
Chris@82 152 /*
Chris@82 153 * This function contains 60 FP additions, 36 FP multiplications,
Chris@82 154 * (or, 36 additions, 12 multiplications, 24 fused multiply/add),
Chris@82 155 * 25 stack variables, 6 constants, and 28 memory accesses
Chris@82 156 */
Chris@82 157 #include "dft/scalar/n.h"
Chris@82 158
Chris@82 159 static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 160 {
Chris@82 161 DK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@82 162 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 163 DK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@82 164 DK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@82 165 DK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@82 166 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 167 {
Chris@82 168 INT i;
Chris@82 169 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
Chris@82 170 E T1, Tu, T4, Tq, Te, Tx, T7, Ts, Tk, Tv, Ta, Tr, Th, Tw;
Chris@82 171 T1 = ri[0];
Chris@82 172 Tu = ii[0];
Chris@82 173 {
Chris@82 174 E T2, T3, Tc, Td;
Chris@82 175 T2 = ri[WS(is, 1)];
Chris@82 176 T3 = ri[WS(is, 6)];
Chris@82 177 T4 = T2 + T3;
Chris@82 178 Tq = T3 - T2;
Chris@82 179 Tc = ii[WS(is, 1)];
Chris@82 180 Td = ii[WS(is, 6)];
Chris@82 181 Te = Tc - Td;
Chris@82 182 Tx = Tc + Td;
Chris@82 183 }
Chris@82 184 {
Chris@82 185 E T5, T6, Ti, Tj;
Chris@82 186 T5 = ri[WS(is, 2)];
Chris@82 187 T6 = ri[WS(is, 5)];
Chris@82 188 T7 = T5 + T6;
Chris@82 189 Ts = T6 - T5;
Chris@82 190 Ti = ii[WS(is, 2)];
Chris@82 191 Tj = ii[WS(is, 5)];
Chris@82 192 Tk = Ti - Tj;
Chris@82 193 Tv = Ti + Tj;
Chris@82 194 }
Chris@82 195 {
Chris@82 196 E T8, T9, Tf, Tg;
Chris@82 197 T8 = ri[WS(is, 3)];
Chris@82 198 T9 = ri[WS(is, 4)];
Chris@82 199 Ta = T8 + T9;
Chris@82 200 Tr = T9 - T8;
Chris@82 201 Tf = ii[WS(is, 3)];
Chris@82 202 Tg = ii[WS(is, 4)];
Chris@82 203 Th = Tf - Tg;
Chris@82 204 Tw = Tf + Tg;
Chris@82 205 }
Chris@82 206 ro[0] = T1 + T4 + T7 + Ta;
Chris@82 207 io[0] = Tu + Tx + Tv + Tw;
Chris@82 208 {
Chris@82 209 E Tl, Tb, TB, TC;
Chris@82 210 Tl = FNMS(KP781831482, Th, KP974927912 * Te) - (KP433883739 * Tk);
Chris@82 211 Tb = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
Chris@82 212 ro[WS(os, 5)] = Tb - Tl;
Chris@82 213 ro[WS(os, 2)] = Tb + Tl;
Chris@82 214 TB = FNMS(KP781831482, Tr, KP974927912 * Tq) - (KP433883739 * Ts);
Chris@82 215 TC = FMA(KP623489801, Tw, Tu) + FNMA(KP900968867, Tv, KP222520933 * Tx);
Chris@82 216 io[WS(os, 2)] = TB + TC;
Chris@82 217 io[WS(os, 5)] = TC - TB;
Chris@82 218 }
Chris@82 219 {
Chris@82 220 E Tn, Tm, Tz, TA;
Chris@82 221 Tn = FMA(KP781831482, Te, KP974927912 * Tk) + (KP433883739 * Th);
Chris@82 222 Tm = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7);
Chris@82 223 ro[WS(os, 6)] = Tm - Tn;
Chris@82 224 ro[WS(os, 1)] = Tm + Tn;
Chris@82 225 Tz = FMA(KP781831482, Tq, KP974927912 * Ts) + (KP433883739 * Tr);
Chris@82 226 TA = FMA(KP623489801, Tx, Tu) + FNMA(KP900968867, Tw, KP222520933 * Tv);
Chris@82 227 io[WS(os, 1)] = Tz + TA;
Chris@82 228 io[WS(os, 6)] = TA - Tz;
Chris@82 229 }
Chris@82 230 {
Chris@82 231 E Tp, To, Tt, Ty;
Chris@82 232 Tp = FMA(KP433883739, Te, KP974927912 * Th) - (KP781831482 * Tk);
Chris@82 233 To = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4);
Chris@82 234 ro[WS(os, 4)] = To - Tp;
Chris@82 235 ro[WS(os, 3)] = To + Tp;
Chris@82 236 Tt = FMA(KP433883739, Tq, KP974927912 * Tr) - (KP781831482 * Ts);
Chris@82 237 Ty = FMA(KP623489801, Tv, Tu) + FNMA(KP222520933, Tw, KP900968867 * Tx);
Chris@82 238 io[WS(os, 3)] = Tt + Ty;
Chris@82 239 io[WS(os, 4)] = Ty - Tt;
Chris@82 240 }
Chris@82 241 }
Chris@82 242 }
Chris@82 243 }
Chris@82 244
Chris@82 245 static const kdft_desc desc = { 7, "n1_7", {36, 12, 24, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 246
Chris@82 247 void X(codelet_n1_7) (planner *p) {
Chris@82 248 X(kdft_register) (p, n1_7, &desc);
Chris@82 249 }
Chris@82 250
Chris@82 251 #endif