annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cbIII_9.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:43 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cbIII_9 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 32 FP additions, 24 FP multiplications,
Chris@82 32 * (or, 8 additions, 0 multiplications, 24 fused multiply/add),
Chris@82 33 * 35 stack variables, 12 constants, and 18 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cbIII.h"
Chris@82 36
Chris@82 37 static void r2cbIII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP1_705737063, +1.705737063904886419256501927880148143872040591);
Chris@82 40 DK(KP1_969615506, +1.969615506024416118733486049179046027341286503);
Chris@82 41 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 42 DK(KP176326980, +0.176326980708464973471090386868618986121633062);
Chris@82 43 DK(KP1_326827896, +1.326827896337876792410842639271782594433726619);
Chris@82 44 DK(KP1_532088886, +1.532088886237956070404785301110833347871664914);
Chris@82 45 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 46 DK(KP839099631, +0.839099631177280011763127298123181364687434283);
Chris@82 47 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 48 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 49 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 50 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 51 {
Chris@82 52 INT i;
Chris@82 53 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@82 54 E T3, Tr, Th, Td, Tc, T8, Tn, Ts, Tk, Tt, T9, Te;
Chris@82 55 {
Chris@82 56 E Tg, T1, T2, Tf;
Chris@82 57 Tg = Ci[WS(csi, 1)];
Chris@82 58 T1 = Cr[WS(csr, 4)];
Chris@82 59 T2 = Cr[WS(csr, 1)];
Chris@82 60 Tf = T2 - T1;
Chris@82 61 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 62 Tr = FMA(KP1_732050807, Tg, Tf);
Chris@82 63 Th = FNMS(KP1_732050807, Tg, Tf);
Chris@82 64 }
Chris@82 65 {
Chris@82 66 E T4, T7, Tm, Tj, Tl, Ti;
Chris@82 67 T4 = Cr[WS(csr, 3)];
Chris@82 68 Td = Ci[WS(csi, 3)];
Chris@82 69 {
Chris@82 70 E T5, T6, Ta, Tb;
Chris@82 71 T5 = Cr[0];
Chris@82 72 T6 = Cr[WS(csr, 2)];
Chris@82 73 T7 = T5 + T6;
Chris@82 74 Tm = T5 - T6;
Chris@82 75 Ta = Ci[WS(csi, 2)];
Chris@82 76 Tb = Ci[0];
Chris@82 77 Tc = Ta - Tb;
Chris@82 78 Tj = Tb + Ta;
Chris@82 79 }
Chris@82 80 T8 = T4 + T7;
Chris@82 81 Tl = FMA(KP500000000, Tc, Td);
Chris@82 82 Tn = FNMS(KP866025403, Tm, Tl);
Chris@82 83 Ts = FMA(KP866025403, Tm, Tl);
Chris@82 84 Ti = FNMS(KP500000000, T7, T4);
Chris@82 85 Tk = FMA(KP866025403, Tj, Ti);
Chris@82 86 Tt = FNMS(KP866025403, Tj, Ti);
Chris@82 87 }
Chris@82 88 R0[0] = FMA(KP2_000000000, T8, T3);
Chris@82 89 T9 = T8 - T3;
Chris@82 90 Te = Tc - Td;
Chris@82 91 R1[WS(rs, 1)] = FMA(KP1_732050807, Te, T9);
Chris@82 92 R0[WS(rs, 3)] = FMS(KP1_732050807, Te, T9);
Chris@82 93 {
Chris@82 94 E Tq, To, Tp, Tw, Tu, Tv;
Chris@82 95 Tq = FNMS(KP839099631, Tk, Tn);
Chris@82 96 To = FMA(KP839099631, Tn, Tk);
Chris@82 97 Tp = FMA(KP766044443, To, Th);
Chris@82 98 R1[0] = FNMS(KP1_532088886, To, Th);
Chris@82 99 R1[WS(rs, 3)] = FMA(KP1_326827896, Tq, Tp);
Chris@82 100 R0[WS(rs, 2)] = FMS(KP1_326827896, Tq, Tp);
Chris@82 101 Tw = FNMS(KP176326980, Ts, Tt);
Chris@82 102 Tu = FMA(KP176326980, Tt, Ts);
Chris@82 103 Tv = FMA(KP984807753, Tu, Tr);
Chris@82 104 R0[WS(rs, 1)] = FMS(KP1_969615506, Tu, Tr);
Chris@82 105 R1[WS(rs, 2)] = FMA(KP1_705737063, Tw, Tv);
Chris@82 106 R0[WS(rs, 4)] = FMS(KP1_705737063, Tw, Tv);
Chris@82 107 }
Chris@82 108 }
Chris@82 109 }
Chris@82 110 }
Chris@82 111
Chris@82 112 static const kr2c_desc desc = { 9, "r2cbIII_9", {8, 0, 24, 0}, &GENUS };
Chris@82 113
Chris@82 114 void X(codelet_r2cbIII_9) (planner *p) {
Chris@82 115 X(kr2c_register) (p, r2cbIII_9, &desc);
Chris@82 116 }
Chris@82 117
Chris@82 118 #else
Chris@82 119
Chris@82 120 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 9 -name r2cbIII_9 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 121
Chris@82 122 /*
Chris@82 123 * This function contains 32 FP additions, 18 FP multiplications,
Chris@82 124 * (or, 22 additions, 8 multiplications, 10 fused multiply/add),
Chris@82 125 * 35 stack variables, 12 constants, and 18 memory accesses
Chris@82 126 */
Chris@82 127 #include "rdft/scalar/r2cbIII.h"
Chris@82 128
Chris@82 129 static void r2cbIII_9(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 130 {
Chris@82 131 DK(KP642787609, +0.642787609686539326322643409907263432907559884);
Chris@82 132 DK(KP766044443, +0.766044443118978035202392650555416673935832457);
Chris@82 133 DK(KP1_326827896, +1.326827896337876792410842639271782594433726619);
Chris@82 134 DK(KP1_113340798, +1.113340798452838732905825904094046265936583811);
Chris@82 135 DK(KP984807753, +0.984807753012208059366743024589523013670643252);
Chris@82 136 DK(KP173648177, +0.173648177666930348851716626769314796000375677);
Chris@82 137 DK(KP1_705737063, +1.705737063904886419256501927880148143872040591);
Chris@82 138 DK(KP300767466, +0.300767466360870593278543795225003852144476517);
Chris@82 139 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 140 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 141 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 142 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 143 {
Chris@82 144 INT i;
Chris@82 145 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(36, rs), MAKE_VOLATILE_STRIDE(36, csr), MAKE_VOLATILE_STRIDE(36, csi)) {
Chris@82 146 E T3, Ts, Ti, Td, Tc, T8, To, Tu, Tl, Tt, T9, Te;
Chris@82 147 {
Chris@82 148 E Th, T1, T2, Tf, Tg;
Chris@82 149 Tg = Ci[WS(csi, 1)];
Chris@82 150 Th = KP1_732050807 * Tg;
Chris@82 151 T1 = Cr[WS(csr, 4)];
Chris@82 152 T2 = Cr[WS(csr, 1)];
Chris@82 153 Tf = T2 - T1;
Chris@82 154 T3 = FMA(KP2_000000000, T2, T1);
Chris@82 155 Ts = Tf - Th;
Chris@82 156 Ti = Tf + Th;
Chris@82 157 }
Chris@82 158 {
Chris@82 159 E T4, T7, Tm, Tk, Tn, Tj;
Chris@82 160 T4 = Cr[WS(csr, 3)];
Chris@82 161 Td = Ci[WS(csi, 3)];
Chris@82 162 {
Chris@82 163 E T5, T6, Ta, Tb;
Chris@82 164 T5 = Cr[0];
Chris@82 165 T6 = Cr[WS(csr, 2)];
Chris@82 166 T7 = T5 + T6;
Chris@82 167 Tm = KP866025403 * (T6 - T5);
Chris@82 168 Ta = Ci[WS(csi, 2)];
Chris@82 169 Tb = Ci[0];
Chris@82 170 Tc = Ta - Tb;
Chris@82 171 Tk = KP866025403 * (Tb + Ta);
Chris@82 172 }
Chris@82 173 T8 = T4 + T7;
Chris@82 174 Tn = FMA(KP500000000, Tc, Td);
Chris@82 175 To = Tm - Tn;
Chris@82 176 Tu = Tm + Tn;
Chris@82 177 Tj = FMS(KP500000000, T7, T4);
Chris@82 178 Tl = Tj + Tk;
Chris@82 179 Tt = Tj - Tk;
Chris@82 180 }
Chris@82 181 R0[0] = FMA(KP2_000000000, T8, T3);
Chris@82 182 T9 = T8 - T3;
Chris@82 183 Te = KP1_732050807 * (Tc - Td);
Chris@82 184 R1[WS(rs, 1)] = T9 + Te;
Chris@82 185 R0[WS(rs, 3)] = Te - T9;
Chris@82 186 {
Chris@82 187 E Tr, Tp, Tq, Tx, Tv, Tw;
Chris@82 188 Tr = FNMS(KP1_705737063, Tl, KP300767466 * To);
Chris@82 189 Tp = FMA(KP173648177, Tl, KP984807753 * To);
Chris@82 190 Tq = Ti - Tp;
Chris@82 191 R0[WS(rs, 1)] = -(FMA(KP2_000000000, Tp, Ti));
Chris@82 192 R0[WS(rs, 4)] = Tr - Tq;
Chris@82 193 R1[WS(rs, 2)] = Tq + Tr;
Chris@82 194 Tx = FMA(KP1_113340798, Tt, KP1_326827896 * Tu);
Chris@82 195 Tv = FNMS(KP642787609, Tu, KP766044443 * Tt);
Chris@82 196 Tw = Tv - Ts;
Chris@82 197 R1[0] = FMA(KP2_000000000, Tv, Ts);
Chris@82 198 R1[WS(rs, 3)] = Tx - Tw;
Chris@82 199 R0[WS(rs, 2)] = Tw + Tx;
Chris@82 200 }
Chris@82 201 }
Chris@82 202 }
Chris@82 203 }
Chris@82 204
Chris@82 205 static const kr2c_desc desc = { 9, "r2cbIII_9", {22, 8, 10, 0}, &GENUS };
Chris@82 206
Chris@82 207 void X(codelet_r2cbIII_9) (planner *p) {
Chris@82 208 X(kr2c_register) (p, r2cbIII_9, &desc);
Chris@82 209 }
Chris@82 210
Chris@82 211 #endif