annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_11.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:26 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 11 -name r2cf_11 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 60 FP additions, 50 FP multiplications,
Chris@82 32 * (or, 15 additions, 5 multiplications, 45 fused multiply/add),
Chris@82 33 * 42 stack variables, 10 constants, and 22 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP918985947, +0.918985947228994779780736114132655398124909697);
Chris@82 40 DK(KP989821441, +0.989821441880932732376092037776718787376519372);
Chris@82 41 DK(KP830830026, +0.830830026003772851058548298459246407048009821);
Chris@82 42 DK(KP715370323, +0.715370323453429719112414662767260662417897278);
Chris@82 43 DK(KP959492973, +0.959492973614497389890368057066327699062454848);
Chris@82 44 DK(KP876768831, +0.876768831002589333891339807079336796764054852);
Chris@82 45 DK(KP778434453, +0.778434453334651800608337670740821884709317477);
Chris@82 46 DK(KP634356270, +0.634356270682424498893150776899916060542806975);
Chris@82 47 DK(KP342584725, +0.342584725681637509502641509861112333758894680);
Chris@82 48 DK(KP521108558, +0.521108558113202722944698153526659300680427422);
Chris@82 49 {
Chris@82 50 INT i;
Chris@82 51 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) {
Chris@82 52 E T1, T4, TC, Tg, TE, T7, TD, Ta, TF, Td, TB, TG, TM, TS, TJ;
Chris@82 53 E TP, Ty, Tq, Ti, Tu, Tm, T5, T6;
Chris@82 54 T1 = R0[0];
Chris@82 55 {
Chris@82 56 E T2, T3, Te, Tf;
Chris@82 57 T2 = R1[0];
Chris@82 58 T3 = R0[WS(rs, 5)];
Chris@82 59 T4 = T2 + T3;
Chris@82 60 TC = T3 - T2;
Chris@82 61 Te = R1[WS(rs, 2)];
Chris@82 62 Tf = R0[WS(rs, 3)];
Chris@82 63 Tg = Te + Tf;
Chris@82 64 TE = Tf - Te;
Chris@82 65 }
Chris@82 66 T5 = R0[WS(rs, 1)];
Chris@82 67 T6 = R1[WS(rs, 4)];
Chris@82 68 T7 = T5 + T6;
Chris@82 69 TD = T5 - T6;
Chris@82 70 {
Chris@82 71 E T8, T9, Tb, Tc;
Chris@82 72 T8 = R1[WS(rs, 1)];
Chris@82 73 T9 = R0[WS(rs, 4)];
Chris@82 74 Ta = T8 + T9;
Chris@82 75 TF = T9 - T8;
Chris@82 76 Tb = R0[WS(rs, 2)];
Chris@82 77 Tc = R1[WS(rs, 3)];
Chris@82 78 Td = Tb + Tc;
Chris@82 79 TB = Tb - Tc;
Chris@82 80 }
Chris@82 81 TG = FMA(KP521108558, TF, TE);
Chris@82 82 TM = FNMS(KP521108558, TD, TB);
Chris@82 83 TS = FMA(KP521108558, TC, TD);
Chris@82 84 TJ = FMA(KP521108558, TE, TC);
Chris@82 85 TP = FNMS(KP521108558, TB, TF);
Chris@82 86 {
Chris@82 87 E Tx, Tp, Th, Tt, Tl;
Chris@82 88 Tx = FNMS(KP342584725, Ta, T7);
Chris@82 89 Ty = FNMS(KP634356270, Tx, Td);
Chris@82 90 Tp = FNMS(KP342584725, T4, Ta);
Chris@82 91 Tq = FNMS(KP634356270, Tp, Tg);
Chris@82 92 Th = FNMS(KP342584725, Tg, Td);
Chris@82 93 Ti = FNMS(KP634356270, Th, Ta);
Chris@82 94 Tt = FNMS(KP342584725, Td, T4);
Chris@82 95 Tu = FNMS(KP634356270, Tt, T7);
Chris@82 96 Tl = FNMS(KP342584725, T7, Tg);
Chris@82 97 Tm = FNMS(KP634356270, Tl, T4);
Chris@82 98 }
Chris@82 99 {
Chris@82 100 E To, Tn, TI, TH;
Chris@82 101 {
Chris@82 102 E Tk, Tj, TU, TT;
Chris@82 103 Tj = FNMS(KP778434453, Ti, T7);
Chris@82 104 Tk = FNMS(KP876768831, Tj, T4);
Chris@82 105 Cr[WS(csr, 5)] = FNMS(KP959492973, Tk, T1);
Chris@82 106 TT = FMA(KP715370323, TS, TF);
Chris@82 107 TU = FMA(KP830830026, TT, TB);
Chris@82 108 Ci[WS(csi, 5)] = KP989821441 * (FMA(KP918985947, TU, TE));
Chris@82 109 }
Chris@82 110 Tn = FNMS(KP778434453, Tm, Ta);
Chris@82 111 To = FNMS(KP876768831, Tn, Td);
Chris@82 112 Cr[WS(csr, 4)] = FNMS(KP959492973, To, T1);
Chris@82 113 {
Chris@82 114 E TR, TQ, Ts, Tr;
Chris@82 115 TQ = FMA(KP715370323, TP, TC);
Chris@82 116 TR = FNMS(KP830830026, TQ, TE);
Chris@82 117 Ci[WS(csi, 4)] = KP989821441 * (FNMS(KP918985947, TR, TD));
Chris@82 118 Tr = FNMS(KP778434453, Tq, Td);
Chris@82 119 Ts = FNMS(KP876768831, Tr, T7);
Chris@82 120 Cr[WS(csr, 3)] = FNMS(KP959492973, Ts, T1);
Chris@82 121 }
Chris@82 122 {
Chris@82 123 E TO, TN, Tw, Tv;
Chris@82 124 TN = FNMS(KP715370323, TM, TE);
Chris@82 125 TO = FNMS(KP830830026, TN, TF);
Chris@82 126 Ci[WS(csi, 3)] = KP989821441 * (FNMS(KP918985947, TO, TC));
Chris@82 127 Tv = FNMS(KP778434453, Tu, Tg);
Chris@82 128 Tw = FNMS(KP876768831, Tv, Ta);
Chris@82 129 Cr[WS(csr, 2)] = FNMS(KP959492973, Tw, T1);
Chris@82 130 Cr[0] = T1 + T4 + T7 + Ta + Td + Tg;
Chris@82 131 }
Chris@82 132 TH = FMA(KP715370323, TG, TD);
Chris@82 133 TI = FNMS(KP830830026, TH, TC);
Chris@82 134 Ci[WS(csi, 2)] = KP989821441 * (FMA(KP918985947, TI, TB));
Chris@82 135 {
Chris@82 136 E TL, TK, TA, Tz;
Chris@82 137 TK = FNMS(KP715370323, TJ, TB);
Chris@82 138 TL = FMA(KP830830026, TK, TD);
Chris@82 139 Ci[WS(csi, 1)] = KP989821441 * (FNMS(KP918985947, TL, TF));
Chris@82 140 Tz = FNMS(KP778434453, Ty, T4);
Chris@82 141 TA = FNMS(KP876768831, Tz, Tg);
Chris@82 142 Cr[WS(csr, 1)] = FNMS(KP959492973, TA, T1);
Chris@82 143 }
Chris@82 144 }
Chris@82 145 }
Chris@82 146 }
Chris@82 147 }
Chris@82 148
Chris@82 149 static const kr2c_desc desc = { 11, "r2cf_11", {15, 5, 45, 0}, &GENUS };
Chris@82 150
Chris@82 151 void X(codelet_r2cf_11) (planner *p) {
Chris@82 152 X(kr2c_register) (p, r2cf_11, &desc);
Chris@82 153 }
Chris@82 154
Chris@82 155 #else
Chris@82 156
Chris@82 157 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 11 -name r2cf_11 -include rdft/scalar/r2cf.h */
Chris@82 158
Chris@82 159 /*
Chris@82 160 * This function contains 60 FP additions, 50 FP multiplications,
Chris@82 161 * (or, 20 additions, 10 multiplications, 40 fused multiply/add),
Chris@82 162 * 28 stack variables, 10 constants, and 22 memory accesses
Chris@82 163 */
Chris@82 164 #include "rdft/scalar/r2cf.h"
Chris@82 165
Chris@82 166 static void r2cf_11(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 167 {
Chris@82 168 DK(KP654860733, +0.654860733945285064056925072466293553183791199);
Chris@82 169 DK(KP142314838, +0.142314838273285140443792668616369668791051361);
Chris@82 170 DK(KP959492973, +0.959492973614497389890368057066327699062454848);
Chris@82 171 DK(KP415415013, +0.415415013001886425529274149229623203524004910);
Chris@82 172 DK(KP841253532, +0.841253532831181168861811648919367717513292498);
Chris@82 173 DK(KP989821441, +0.989821441880932732376092037776718787376519372);
Chris@82 174 DK(KP909631995, +0.909631995354518371411715383079028460060241051);
Chris@82 175 DK(KP281732556, +0.281732556841429697711417915346616899035777899);
Chris@82 176 DK(KP540640817, +0.540640817455597582107635954318691695431770608);
Chris@82 177 DK(KP755749574, +0.755749574354258283774035843972344420179717445);
Chris@82 178 {
Chris@82 179 INT i;
Chris@82 180 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(44, rs), MAKE_VOLATILE_STRIDE(44, csr), MAKE_VOLATILE_STRIDE(44, csi)) {
Chris@82 181 E T1, T4, Tl, Tg, Th, Td, Ti, Ta, Tk, T7, Tj, Tb, Tc;
Chris@82 182 T1 = R0[0];
Chris@82 183 {
Chris@82 184 E T2, T3, Te, Tf;
Chris@82 185 T2 = R0[WS(rs, 1)];
Chris@82 186 T3 = R1[WS(rs, 4)];
Chris@82 187 T4 = T2 + T3;
Chris@82 188 Tl = T3 - T2;
Chris@82 189 Te = R1[0];
Chris@82 190 Tf = R0[WS(rs, 5)];
Chris@82 191 Tg = Te + Tf;
Chris@82 192 Th = Tf - Te;
Chris@82 193 }
Chris@82 194 Tb = R1[WS(rs, 1)];
Chris@82 195 Tc = R0[WS(rs, 4)];
Chris@82 196 Td = Tb + Tc;
Chris@82 197 Ti = Tc - Tb;
Chris@82 198 {
Chris@82 199 E T8, T9, T5, T6;
Chris@82 200 T8 = R1[WS(rs, 2)];
Chris@82 201 T9 = R0[WS(rs, 3)];
Chris@82 202 Ta = T8 + T9;
Chris@82 203 Tk = T9 - T8;
Chris@82 204 T5 = R0[WS(rs, 2)];
Chris@82 205 T6 = R1[WS(rs, 3)];
Chris@82 206 T7 = T5 + T6;
Chris@82 207 Tj = T6 - T5;
Chris@82 208 }
Chris@82 209 Ci[WS(csi, 4)] = FMA(KP755749574, Th, KP540640817 * Ti) + FNMS(KP909631995, Tk, KP281732556 * Tj) - (KP989821441 * Tl);
Chris@82 210 Cr[WS(csr, 4)] = FMA(KP841253532, Td, T1) + FNMS(KP959492973, T7, KP415415013 * Ta) + FNMA(KP142314838, T4, KP654860733 * Tg);
Chris@82 211 Ci[WS(csi, 2)] = FMA(KP909631995, Th, KP755749574 * Tl) + FNMA(KP540640817, Tk, KP989821441 * Tj) - (KP281732556 * Ti);
Chris@82 212 Ci[WS(csi, 5)] = FMA(KP281732556, Th, KP755749574 * Ti) + FNMS(KP909631995, Tj, KP989821441 * Tk) - (KP540640817 * Tl);
Chris@82 213 Ci[WS(csi, 1)] = FMA(KP540640817, Th, KP909631995 * Tl) + FMA(KP989821441, Ti, KP755749574 * Tj) + (KP281732556 * Tk);
Chris@82 214 Ci[WS(csi, 3)] = FMA(KP989821441, Th, KP540640817 * Tj) + FNMS(KP909631995, Ti, KP755749574 * Tk) - (KP281732556 * Tl);
Chris@82 215 Cr[WS(csr, 3)] = FMA(KP415415013, Td, T1) + FNMS(KP654860733, Ta, KP841253532 * T7) + FNMA(KP959492973, T4, KP142314838 * Tg);
Chris@82 216 Cr[WS(csr, 1)] = FMA(KP841253532, Tg, T1) + FNMS(KP959492973, Ta, KP415415013 * T4) + FNMA(KP654860733, T7, KP142314838 * Td);
Chris@82 217 Cr[0] = T1 + Tg + T4 + Td + T7 + Ta;
Chris@82 218 Cr[WS(csr, 2)] = FMA(KP415415013, Tg, T1) + FNMS(KP142314838, T7, KP841253532 * Ta) + FNMA(KP959492973, Td, KP654860733 * T4);
Chris@82 219 Cr[WS(csr, 5)] = FMA(KP841253532, T4, T1) + FNMS(KP142314838, Ta, KP415415013 * T7) + FNMA(KP654860733, Td, KP959492973 * Tg);
Chris@82 220 }
Chris@82 221 }
Chris@82 222 }
Chris@82 223
Chris@82 224 static const kr2c_desc desc = { 11, "r2cf_11", {20, 10, 40, 0}, &GENUS };
Chris@82 225
Chris@82 226 void X(codelet_r2cf_11) (planner *p) {
Chris@82 227 X(kr2c_register) (p, r2cf_11, &desc);
Chris@82 228 }
Chris@82 229
Chris@82 230 #endif