annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cbIII_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:44 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 64 FP additions, 43 FP multiplications,
Chris@82 32 * (or, 21 additions, 0 multiplications, 43 fused multiply/add),
Chris@82 33 * 42 stack variables, 9 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cbIII.h"
Chris@82 36
Chris@82 37 static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 42 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 43 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 44 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 45 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 46 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 47 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 48 {
Chris@82 49 INT i;
Chris@82 50 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 51 E Tk, TA, T5, Th, Tz, T6, Tn, TX, TR, Td, Tm, TI, Tv, TN, TD;
Chris@82 52 E TL, TM, Ti, Tj, T12, Te, T11;
Chris@82 53 Ti = Ci[WS(csi, 4)];
Chris@82 54 Tj = Ci[WS(csi, 1)];
Chris@82 55 Tk = FMA(KP618033988, Tj, Ti);
Chris@82 56 TA = FNMS(KP618033988, Ti, Tj);
Chris@82 57 {
Chris@82 58 E T1, T4, Tg, T2, T3, Tf;
Chris@82 59 T1 = Cr[WS(csr, 7)];
Chris@82 60 T2 = Cr[WS(csr, 4)];
Chris@82 61 T3 = Cr[WS(csr, 1)];
Chris@82 62 T4 = T2 + T3;
Chris@82 63 Tg = T2 - T3;
Chris@82 64 T5 = FMA(KP2_000000000, T4, T1);
Chris@82 65 Tf = FNMS(KP500000000, T4, T1);
Chris@82 66 Th = FMA(KP1_118033988, Tg, Tf);
Chris@82 67 Tz = FNMS(KP1_118033988, Tg, Tf);
Chris@82 68 }
Chris@82 69 {
Chris@82 70 E Tc, TP, T9, TQ;
Chris@82 71 T6 = Cr[WS(csr, 2)];
Chris@82 72 {
Chris@82 73 E Ta, Tb, T7, T8;
Chris@82 74 Ta = Cr[WS(csr, 3)];
Chris@82 75 Tb = Cr[WS(csr, 6)];
Chris@82 76 Tc = Ta + Tb;
Chris@82 77 TP = Ta - Tb;
Chris@82 78 T7 = Cr[0];
Chris@82 79 T8 = Cr[WS(csr, 5)];
Chris@82 80 T9 = T7 + T8;
Chris@82 81 TQ = T7 - T8;
Chris@82 82 }
Chris@82 83 Tn = T9 - Tc;
Chris@82 84 TX = FMA(KP618033988, TP, TQ);
Chris@82 85 TR = FNMS(KP618033988, TQ, TP);
Chris@82 86 Td = T9 + Tc;
Chris@82 87 Tm = FNMS(KP250000000, Td, T6);
Chris@82 88 }
Chris@82 89 {
Chris@82 90 E Tu, TK, Tr, TJ;
Chris@82 91 TI = Ci[WS(csi, 2)];
Chris@82 92 {
Chris@82 93 E Ts, Tt, Tp, Tq;
Chris@82 94 Ts = Ci[WS(csi, 3)];
Chris@82 95 Tt = Ci[WS(csi, 6)];
Chris@82 96 Tu = Ts - Tt;
Chris@82 97 TK = Ts + Tt;
Chris@82 98 Tp = Ci[0];
Chris@82 99 Tq = Ci[WS(csi, 5)];
Chris@82 100 Tr = Tp + Tq;
Chris@82 101 TJ = Tq - Tp;
Chris@82 102 }
Chris@82 103 Tv = FMA(KP618033988, Tu, Tr);
Chris@82 104 TN = TJ + TK;
Chris@82 105 TD = FNMS(KP618033988, Tr, Tu);
Chris@82 106 TL = TJ - TK;
Chris@82 107 TM = FNMS(KP250000000, TL, TI);
Chris@82 108 }
Chris@82 109 T12 = TL + TI;
Chris@82 110 Te = T6 + Td;
Chris@82 111 T11 = Te - T5;
Chris@82 112 R0[0] = FMA(KP2_000000000, Te, T5);
Chris@82 113 R0[WS(rs, 5)] = FMS(KP1_732050807, T12, T11);
Chris@82 114 R1[WS(rs, 2)] = FMA(KP1_732050807, T12, T11);
Chris@82 115 {
Chris@82 116 E TB, TF, TE, TG, TS, TU, TC, TO, TH, TT;
Chris@82 117 TB = FNMS(KP1_902113032, TA, Tz);
Chris@82 118 TF = FMA(KP1_902113032, TA, Tz);
Chris@82 119 TC = FNMS(KP559016994, Tn, Tm);
Chris@82 120 TE = FMA(KP951056516, TD, TC);
Chris@82 121 TG = FNMS(KP951056516, TD, TC);
Chris@82 122 TO = FNMS(KP559016994, TN, TM);
Chris@82 123 TS = FMA(KP951056516, TR, TO);
Chris@82 124 TU = FNMS(KP951056516, TR, TO);
Chris@82 125 R0[WS(rs, 6)] = FMA(KP2_000000000, TE, TB);
Chris@82 126 R1[WS(rs, 1)] = -(FMA(KP2_000000000, TG, TF));
Chris@82 127 TH = TB - TE;
Chris@82 128 R0[WS(rs, 1)] = FNMS(KP1_732050807, TS, TH);
Chris@82 129 R1[WS(rs, 3)] = -(FMA(KP1_732050807, TS, TH));
Chris@82 130 TT = TF - TG;
Chris@82 131 R0[WS(rs, 4)] = FNMS(KP1_732050807, TU, TT);
Chris@82 132 R1[WS(rs, 6)] = -(FMA(KP1_732050807, TU, TT));
Chris@82 133 }
Chris@82 134 {
Chris@82 135 E Tl, Tx, Tw, Ty, TY, T10, To, TW, TV, TZ;
Chris@82 136 Tl = FNMS(KP1_902113032, Tk, Th);
Chris@82 137 Tx = FMA(KP1_902113032, Tk, Th);
Chris@82 138 To = FMA(KP559016994, Tn, Tm);
Chris@82 139 Tw = FMA(KP951056516, Tv, To);
Chris@82 140 Ty = FNMS(KP951056516, Tv, To);
Chris@82 141 TW = FMA(KP559016994, TN, TM);
Chris@82 142 TY = FNMS(KP951056516, TX, TW);
Chris@82 143 T10 = FMA(KP951056516, TX, TW);
Chris@82 144 R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tw, Tl));
Chris@82 145 R0[WS(rs, 3)] = FMA(KP2_000000000, Ty, Tx);
Chris@82 146 TV = Ty - Tx;
Chris@82 147 R1[0] = FNMS(KP1_732050807, TY, TV);
Chris@82 148 R1[WS(rs, 5)] = FMA(KP1_732050807, TY, TV);
Chris@82 149 TZ = Tl - Tw;
Chris@82 150 R0[WS(rs, 7)] = FNMS(KP1_732050807, T10, TZ);
Chris@82 151 R0[WS(rs, 2)] = FMA(KP1_732050807, T10, TZ);
Chris@82 152 }
Chris@82 153 }
Chris@82 154 }
Chris@82 155 }
Chris@82 156
Chris@82 157 static const kr2c_desc desc = { 15, "r2cbIII_15", {21, 0, 43, 0}, &GENUS };
Chris@82 158
Chris@82 159 void X(codelet_r2cbIII_15) (planner *p) {
Chris@82 160 X(kr2c_register) (p, r2cbIII_15, &desc);
Chris@82 161 }
Chris@82 162
Chris@82 163 #else
Chris@82 164
Chris@82 165 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include rdft/scalar/r2cbIII.h */
Chris@82 166
Chris@82 167 /*
Chris@82 168 * This function contains 64 FP additions, 26 FP multiplications,
Chris@82 169 * (or, 49 additions, 11 multiplications, 15 fused multiply/add),
Chris@82 170 * 47 stack variables, 14 constants, and 30 memory accesses
Chris@82 171 */
Chris@82 172 #include "rdft/scalar/r2cbIII.h"
Chris@82 173
Chris@82 174 static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 175 {
Chris@82 176 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 177 DK(KP433012701, +0.433012701892219323381861585376468091735701313);
Chris@82 178 DK(KP968245836, +0.968245836551854221294816349945599902708230426);
Chris@82 179 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 180 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 181 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 182 DK(KP1_647278207, +1.647278207092663851754840078556380006059321028);
Chris@82 183 DK(KP1_018073920, +1.018073920910254366901961726787815297021466329);
Chris@82 184 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 185 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 186 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 187 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 188 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@82 189 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 190 {
Chris@82 191 INT i;
Chris@82 192 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
Chris@82 193 E Tv, TD, T5, Ts, TC, T6, Tf, TW, TK, Td, Tg, TP, To, TN, TA;
Chris@82 194 E TO, TQ, Tt, Tu, T12, Te, T11;
Chris@82 195 Tt = Ci[WS(csi, 4)];
Chris@82 196 Tu = Ci[WS(csi, 1)];
Chris@82 197 Tv = FMA(KP1_902113032, Tt, KP1_175570504 * Tu);
Chris@82 198 TD = FNMS(KP1_175570504, Tt, KP1_902113032 * Tu);
Chris@82 199 {
Chris@82 200 E T1, T4, Tq, T2, T3, Tr;
Chris@82 201 T1 = Cr[WS(csr, 7)];
Chris@82 202 T2 = Cr[WS(csr, 4)];
Chris@82 203 T3 = Cr[WS(csr, 1)];
Chris@82 204 T4 = T2 + T3;
Chris@82 205 Tq = KP1_118033988 * (T2 - T3);
Chris@82 206 T5 = FMA(KP2_000000000, T4, T1);
Chris@82 207 Tr = FNMS(KP500000000, T4, T1);
Chris@82 208 Ts = Tq + Tr;
Chris@82 209 TC = Tr - Tq;
Chris@82 210 }
Chris@82 211 {
Chris@82 212 E Tc, TJ, T9, TI;
Chris@82 213 T6 = Cr[WS(csr, 2)];
Chris@82 214 {
Chris@82 215 E Ta, Tb, T7, T8;
Chris@82 216 Ta = Cr[WS(csr, 3)];
Chris@82 217 Tb = Cr[WS(csr, 6)];
Chris@82 218 Tc = Ta + Tb;
Chris@82 219 TJ = Ta - Tb;
Chris@82 220 T7 = Cr[0];
Chris@82 221 T8 = Cr[WS(csr, 5)];
Chris@82 222 T9 = T7 + T8;
Chris@82 223 TI = T7 - T8;
Chris@82 224 }
Chris@82 225 Tf = KP559016994 * (T9 - Tc);
Chris@82 226 TW = FNMS(KP1_647278207, TJ, KP1_018073920 * TI);
Chris@82 227 TK = FMA(KP1_647278207, TI, KP1_018073920 * TJ);
Chris@82 228 Td = T9 + Tc;
Chris@82 229 Tg = FNMS(KP250000000, Td, T6);
Chris@82 230 }
Chris@82 231 {
Chris@82 232 E Tn, TM, Tk, TL;
Chris@82 233 TP = Ci[WS(csi, 2)];
Chris@82 234 {
Chris@82 235 E Tl, Tm, Ti, Tj;
Chris@82 236 Tl = Ci[WS(csi, 3)];
Chris@82 237 Tm = Ci[WS(csi, 6)];
Chris@82 238 Tn = Tl - Tm;
Chris@82 239 TM = Tl + Tm;
Chris@82 240 Ti = Ci[0];
Chris@82 241 Tj = Ci[WS(csi, 5)];
Chris@82 242 Tk = Ti + Tj;
Chris@82 243 TL = Ti - Tj;
Chris@82 244 }
Chris@82 245 To = FMA(KP951056516, Tk, KP587785252 * Tn);
Chris@82 246 TN = KP968245836 * (TL - TM);
Chris@82 247 TA = FNMS(KP587785252, Tk, KP951056516 * Tn);
Chris@82 248 TO = TL + TM;
Chris@82 249 TQ = FMA(KP433012701, TO, KP1_732050807 * TP);
Chris@82 250 }
Chris@82 251 T12 = KP1_732050807 * (TP - TO);
Chris@82 252 Te = T6 + Td;
Chris@82 253 T11 = Te - T5;
Chris@82 254 R0[0] = FMA(KP2_000000000, Te, T5);
Chris@82 255 R0[WS(rs, 5)] = T12 - T11;
Chris@82 256 R1[WS(rs, 2)] = T11 + T12;
Chris@82 257 {
Chris@82 258 E TE, TG, TB, TF, TY, T10, Tz, TX, TV, TZ;
Chris@82 259 TE = TC - TD;
Chris@82 260 TG = TC + TD;
Chris@82 261 Tz = Tg - Tf;
Chris@82 262 TB = Tz + TA;
Chris@82 263 TF = TA - Tz;
Chris@82 264 TX = TN + TQ;
Chris@82 265 TY = TW - TX;
Chris@82 266 T10 = TW + TX;
Chris@82 267 R0[WS(rs, 6)] = FMA(KP2_000000000, TB, TE);
Chris@82 268 R1[WS(rs, 1)] = FMS(KP2_000000000, TF, TG);
Chris@82 269 TV = TE - TB;
Chris@82 270 R0[WS(rs, 1)] = TV + TY;
Chris@82 271 R1[WS(rs, 3)] = TY - TV;
Chris@82 272 TZ = TF + TG;
Chris@82 273 R0[WS(rs, 4)] = TZ - T10;
Chris@82 274 R1[WS(rs, 6)] = -(TZ + T10);
Chris@82 275 }
Chris@82 276 {
Chris@82 277 E Tw, Ty, Tp, Tx, TS, TU, Th, TR, TH, TT;
Chris@82 278 Tw = Ts - Tv;
Chris@82 279 Ty = Ts + Tv;
Chris@82 280 Th = Tf + Tg;
Chris@82 281 Tp = Th + To;
Chris@82 282 Tx = Th - To;
Chris@82 283 TR = TN - TQ;
Chris@82 284 TS = TK + TR;
Chris@82 285 TU = TR - TK;
Chris@82 286 R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tp, Tw));
Chris@82 287 R0[WS(rs, 3)] = FMA(KP2_000000000, Tx, Ty);
Chris@82 288 TH = Tx - Ty;
Chris@82 289 R1[WS(rs, 5)] = TH - TS;
Chris@82 290 R1[0] = TH + TS;
Chris@82 291 TT = Tw - Tp;
Chris@82 292 R0[WS(rs, 2)] = TT - TU;
Chris@82 293 R0[WS(rs, 7)] = TT + TU;
Chris@82 294 }
Chris@82 295 }
Chris@82 296 }
Chris@82 297 }
Chris@82 298
Chris@82 299 static const kr2c_desc desc = { 15, "r2cbIII_15", {49, 11, 15, 0}, &GENUS };
Chris@82 300
Chris@82 301 void X(codelet_r2cbIII_15) (planner *p) {
Chris@82 302 X(kr2c_register) (p, r2cbIII_15, &desc);
Chris@82 303 }
Chris@82 304
Chris@82 305 #endif