annotate src/fftw-3.3.8/rdft/scalar/r2cf/r2cf_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:26 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include rdft/scalar/r2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 76 FP additions, 51 FP multiplications,
Chris@82 32 * (or, 31 additions, 6 multiplications, 45 fused multiply/add),
Chris@82 33 * 58 stack variables, 23 constants, and 26 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cf.h"
Chris@82 36
Chris@82 37 static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 40 DK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@82 41 DK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@82 42 DK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@82 43 DK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@82 44 DK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@82 45 DK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@82 46 DK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@82 47 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 48 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 49 DK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@82 50 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 51 DK(KP904176221, +0.904176221990848204433795481776887926501523162);
Chris@82 52 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 53 DK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@82 54 DK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@82 55 DK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@82 56 DK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@82 57 DK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@82 58 DK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@82 59 DK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@82 60 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 61 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 62 {
Chris@82 63 INT i;
Chris@82 64 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@82 65 E TN, TA, TD, TO, TR, TS, TZ, T12, Tu, Tx, Tj, Tw, TW, T13;
Chris@82 66 TN = R0[0];
Chris@82 67 {
Chris@82 68 E T3, TP, Th, TB, Tp, Te, TC, Tm, T6, Tr, T9, Ts, Ta, TQ, T1;
Chris@82 69 E T2;
Chris@82 70 T1 = R0[WS(rs, 4)];
Chris@82 71 T2 = R1[WS(rs, 2)];
Chris@82 72 T3 = T1 - T2;
Chris@82 73 TP = T1 + T2;
Chris@82 74 {
Chris@82 75 E Tn, Tf, Tg, To;
Chris@82 76 Tn = R0[WS(rs, 6)];
Chris@82 77 Tf = R0[WS(rs, 5)];
Chris@82 78 Tg = R0[WS(rs, 2)];
Chris@82 79 To = Tf + Tg;
Chris@82 80 Th = Tf - Tg;
Chris@82 81 TB = Tn + To;
Chris@82 82 Tp = FMS(KP500000000, To, Tn);
Chris@82 83 }
Chris@82 84 {
Chris@82 85 E Tk, Tc, Td, Tl;
Chris@82 86 Tk = R1[0];
Chris@82 87 Tc = R1[WS(rs, 4)];
Chris@82 88 Td = R1[WS(rs, 1)];
Chris@82 89 Tl = Td + Tc;
Chris@82 90 Te = Tc - Td;
Chris@82 91 TC = Tk + Tl;
Chris@82 92 Tm = FNMS(KP500000000, Tl, Tk);
Chris@82 93 }
Chris@82 94 {
Chris@82 95 E T4, T5, T7, T8;
Chris@82 96 T4 = R1[WS(rs, 5)];
Chris@82 97 T5 = R0[WS(rs, 3)];
Chris@82 98 T6 = T4 - T5;
Chris@82 99 Tr = T4 + T5;
Chris@82 100 T7 = R1[WS(rs, 3)];
Chris@82 101 T8 = R0[WS(rs, 1)];
Chris@82 102 T9 = T7 - T8;
Chris@82 103 Ts = T7 + T8;
Chris@82 104 }
Chris@82 105 Ta = T6 + T9;
Chris@82 106 TQ = Tr + Ts;
Chris@82 107 TA = T3 + Ta;
Chris@82 108 TD = TB - TC;
Chris@82 109 TO = TC + TB;
Chris@82 110 TR = TP + TQ;
Chris@82 111 TS = TO + TR;
Chris@82 112 {
Chris@82 113 E TX, TY, Tq, Tt;
Chris@82 114 TX = Tm - Tp;
Chris@82 115 TY = FNMS(KP500000000, TQ, TP);
Chris@82 116 TZ = TX + TY;
Chris@82 117 T12 = TX - TY;
Chris@82 118 Tq = Tm + Tp;
Chris@82 119 Tt = Tr - Ts;
Chris@82 120 Tu = FMA(KP866025403, Tt, Tq);
Chris@82 121 Tx = FNMS(KP866025403, Tt, Tq);
Chris@82 122 }
Chris@82 123 {
Chris@82 124 E Tb, Ti, TU, TV;
Chris@82 125 Tb = FNMS(KP500000000, Ta, T3);
Chris@82 126 Ti = Te + Th;
Chris@82 127 Tj = FMA(KP866025403, Ti, Tb);
Chris@82 128 Tw = FNMS(KP866025403, Ti, Tb);
Chris@82 129 TU = Th - Te;
Chris@82 130 TV = T6 - T9;
Chris@82 131 TW = TU + TV;
Chris@82 132 T13 = TU - TV;
Chris@82 133 }
Chris@82 134 }
Chris@82 135 Cr[0] = TN + TS;
Chris@82 136 {
Chris@82 137 E TE, TI, Tz, TK, TH, TM, TJ, TL;
Chris@82 138 TE = FMA(KP302775637, TD, TA);
Chris@82 139 TI = FNMS(KP302775637, TA, TD);
Chris@82 140 {
Chris@82 141 E Tv, Ty, TF, TG;
Chris@82 142 Tv = FMA(KP038632954, Tu, Tj);
Chris@82 143 Ty = FMA(KP612264650, Tx, Tw);
Chris@82 144 Tz = FNMS(KP853480001, Ty, Tv);
Chris@82 145 TK = FMA(KP853480001, Ty, Tv);
Chris@82 146 TF = FNMS(KP038632954, Tj, Tu);
Chris@82 147 TG = FNMS(KP612264650, Tw, Tx);
Chris@82 148 TH = FNMS(KP853480001, TG, TF);
Chris@82 149 TM = FMA(KP853480001, TG, TF);
Chris@82 150 }
Chris@82 151 Ci[WS(csi, 1)] = KP600477271 * (FMA(KP957805992, TE, Tz));
Chris@82 152 Ci[WS(csi, 5)] = -(KP600477271 * (FNMS(KP957805992, TI, TH)));
Chris@82 153 TJ = FMA(KP522026385, TH, TI);
Chris@82 154 Ci[WS(csi, 2)] = KP575140729 * (FNMS(KP904176221, TK, TJ));
Chris@82 155 Ci[WS(csi, 6)] = KP575140729 * (FMA(KP904176221, TK, TJ));
Chris@82 156 TL = FNMS(KP522026385, Tz, TE);
Chris@82 157 Ci[WS(csi, 3)] = KP575140729 * (FNMS(KP904176221, TM, TL));
Chris@82 158 Ci[WS(csi, 4)] = -(KP575140729 * (FMA(KP904176221, TM, TL)));
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E T11, T17, T1c, T1e, T16, T18, TT, T10, T19, T1d;
Chris@82 162 TT = FNMS(KP083333333, TS, TN);
Chris@82 163 T10 = FMA(KP301479260, TZ, TW);
Chris@82 164 T11 = FMA(KP503537032, T10, TT);
Chris@82 165 T17 = FNMS(KP251768516, T10, TT);
Chris@82 166 {
Chris@82 167 E T1a, T1b, T14, T15;
Chris@82 168 T1a = FNMS(KP226109445, TW, TZ);
Chris@82 169 T1b = FMA(KP686558370, T12, T13);
Chris@82 170 T1c = FNMS(KP769338817, T1b, T1a);
Chris@82 171 T1e = FMA(KP769338817, T1b, T1a);
Chris@82 172 T14 = FNMS(KP514918778, T13, T12);
Chris@82 173 T15 = TO - TR;
Chris@82 174 T16 = FMA(KP581704778, T15, T14);
Chris@82 175 T18 = FNMS(KP859542535, T14, T15);
Chris@82 176 }
Chris@82 177 Cr[WS(csr, 5)] = FNMS(KP516520780, T16, T11);
Chris@82 178 Cr[WS(csr, 1)] = FMA(KP516520780, T16, T11);
Chris@82 179 T19 = FMA(KP300462606, T18, T17);
Chris@82 180 Cr[WS(csr, 4)] = FNMS(KP503537032, T1c, T19);
Chris@82 181 Cr[WS(csr, 3)] = FMA(KP503537032, T1c, T19);
Chris@82 182 T1d = FNMS(KP300462606, T18, T17);
Chris@82 183 Cr[WS(csr, 6)] = FNMS(KP503537032, T1e, T1d);
Chris@82 184 Cr[WS(csr, 2)] = FMA(KP503537032, T1e, T1d);
Chris@82 185 }
Chris@82 186 }
Chris@82 187 }
Chris@82 188 }
Chris@82 189
Chris@82 190 static const kr2c_desc desc = { 13, "r2cf_13", {31, 6, 45, 0}, &GENUS };
Chris@82 191
Chris@82 192 void X(codelet_r2cf_13) (planner *p) {
Chris@82 193 X(kr2c_register) (p, r2cf_13, &desc);
Chris@82 194 }
Chris@82 195
Chris@82 196 #else
Chris@82 197
Chris@82 198 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include rdft/scalar/r2cf.h */
Chris@82 199
Chris@82 200 /*
Chris@82 201 * This function contains 76 FP additions, 34 FP multiplications,
Chris@82 202 * (or, 57 additions, 15 multiplications, 19 fused multiply/add),
Chris@82 203 * 55 stack variables, 20 constants, and 26 memory accesses
Chris@82 204 */
Chris@82 205 #include "rdft/scalar/r2cf.h"
Chris@82 206
Chris@82 207 static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 208 {
Chris@82 209 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 210 DK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@82 211 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 212 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 213 DK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@82 214 DK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@82 215 DK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@82 216 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 217 DK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@82 218 DK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@82 219 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 220 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 221 DK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@82 222 DK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@82 223 DK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@82 224 DK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@82 225 DK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@82 226 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 227 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 228 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 229 {
Chris@82 230 INT i;
Chris@82 231 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@82 232 E T13, Tb, Tm, TW, TX, T14, TU, T10, Tz, TB, Tu, TC, TR, T11;
Chris@82 233 T13 = R0[0];
Chris@82 234 {
Chris@82 235 E Te, TO, Ta, Tv, To, T5, Tw, Tp, Th, Tr, Tk, Ts, Tl, TP, Tc;
Chris@82 236 E Td;
Chris@82 237 Tc = R0[WS(rs, 4)];
Chris@82 238 Td = R1[WS(rs, 2)];
Chris@82 239 Te = Tc - Td;
Chris@82 240 TO = Tc + Td;
Chris@82 241 {
Chris@82 242 E T6, T7, T8, T9;
Chris@82 243 T6 = R1[0];
Chris@82 244 T7 = R1[WS(rs, 1)];
Chris@82 245 T8 = R1[WS(rs, 4)];
Chris@82 246 T9 = T7 + T8;
Chris@82 247 Ta = T6 + T9;
Chris@82 248 Tv = T7 - T8;
Chris@82 249 To = FNMS(KP500000000, T9, T6);
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T1, T2, T3, T4;
Chris@82 253 T1 = R0[WS(rs, 6)];
Chris@82 254 T2 = R0[WS(rs, 5)];
Chris@82 255 T3 = R0[WS(rs, 2)];
Chris@82 256 T4 = T2 + T3;
Chris@82 257 T5 = T1 + T4;
Chris@82 258 Tw = T2 - T3;
Chris@82 259 Tp = FNMS(KP500000000, T4, T1);
Chris@82 260 }
Chris@82 261 {
Chris@82 262 E Tf, Tg, Ti, Tj;
Chris@82 263 Tf = R1[WS(rs, 5)];
Chris@82 264 Tg = R0[WS(rs, 3)];
Chris@82 265 Th = Tf - Tg;
Chris@82 266 Tr = Tf + Tg;
Chris@82 267 Ti = R1[WS(rs, 3)];
Chris@82 268 Tj = R0[WS(rs, 1)];
Chris@82 269 Tk = Ti - Tj;
Chris@82 270 Ts = Ti + Tj;
Chris@82 271 }
Chris@82 272 Tl = Th + Tk;
Chris@82 273 TP = Tr + Ts;
Chris@82 274 Tb = T5 - Ta;
Chris@82 275 Tm = Te + Tl;
Chris@82 276 TW = Ta + T5;
Chris@82 277 TX = TO + TP;
Chris@82 278 T14 = TW + TX;
Chris@82 279 {
Chris@82 280 E TS, TT, Tx, Ty;
Chris@82 281 TS = Tv + Tw;
Chris@82 282 TT = Th - Tk;
Chris@82 283 TU = TS - TT;
Chris@82 284 T10 = TS + TT;
Chris@82 285 Tx = KP866025403 * (Tv - Tw);
Chris@82 286 Ty = FNMS(KP500000000, Tl, Te);
Chris@82 287 Tz = Tx + Ty;
Chris@82 288 TB = Ty - Tx;
Chris@82 289 }
Chris@82 290 {
Chris@82 291 E Tq, Tt, TN, TQ;
Chris@82 292 Tq = To - Tp;
Chris@82 293 Tt = KP866025403 * (Tr - Ts);
Chris@82 294 Tu = Tq - Tt;
Chris@82 295 TC = Tq + Tt;
Chris@82 296 TN = To + Tp;
Chris@82 297 TQ = FNMS(KP500000000, TP, TO);
Chris@82 298 TR = TN - TQ;
Chris@82 299 T11 = TN + TQ;
Chris@82 300 }
Chris@82 301 }
Chris@82 302 Cr[0] = T13 + T14;
Chris@82 303 {
Chris@82 304 E Tn, TG, TE, TF, TJ, TM, TK, TL;
Chris@82 305 Tn = FNMS(KP174138601, Tm, KP575140729 * Tb);
Chris@82 306 TG = FMA(KP174138601, Tb, KP575140729 * Tm);
Chris@82 307 {
Chris@82 308 E TA, TD, TH, TI;
Chris@82 309 TA = FNMS(KP156891391, Tz, KP256247671 * Tu);
Chris@82 310 TD = FNMS(KP300238635, TC, KP011599105 * TB);
Chris@82 311 TE = TA + TD;
Chris@82 312 TF = KP1_732050807 * (TD - TA);
Chris@82 313 TH = FMA(KP300238635, TB, KP011599105 * TC);
Chris@82 314 TI = FMA(KP256247671, Tz, KP156891391 * Tu);
Chris@82 315 TJ = TH - TI;
Chris@82 316 TM = KP1_732050807 * (TI + TH);
Chris@82 317 }
Chris@82 318 Ci[WS(csi, 5)] = FMA(KP2_000000000, TE, Tn);
Chris@82 319 Ci[WS(csi, 1)] = FMA(KP2_000000000, TJ, TG);
Chris@82 320 TK = TG - TJ;
Chris@82 321 Ci[WS(csi, 4)] = TF - TK;
Chris@82 322 Ci[WS(csi, 3)] = TF + TK;
Chris@82 323 TL = Tn - TE;
Chris@82 324 Ci[WS(csi, 2)] = TL - TM;
Chris@82 325 Ci[WS(csi, 6)] = TL + TM;
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E TZ, T1b, T19, T1e, T16, T1a, TV, TY, T1c, T1d;
Chris@82 329 TV = FNMS(KP132983124, TU, KP258260390 * TR);
Chris@82 330 TY = KP300462606 * (TW - TX);
Chris@82 331 TZ = FMA(KP2_000000000, TV, TY);
Chris@82 332 T1b = TY - TV;
Chris@82 333 {
Chris@82 334 E T17, T18, T12, T15;
Chris@82 335 T17 = FMA(KP387390585, TU, KP265966249 * TR);
Chris@82 336 T18 = FNMS(KP503537032, T11, KP113854479 * T10);
Chris@82 337 T19 = T17 - T18;
Chris@82 338 T1e = T17 + T18;
Chris@82 339 T12 = FMA(KP251768516, T10, KP075902986 * T11);
Chris@82 340 T15 = FNMS(KP083333333, T14, T13);
Chris@82 341 T16 = FMA(KP2_000000000, T12, T15);
Chris@82 342 T1a = T15 - T12;
Chris@82 343 }
Chris@82 344 Cr[WS(csr, 1)] = TZ + T16;
Chris@82 345 Cr[WS(csr, 5)] = T16 - TZ;
Chris@82 346 T1c = T1a - T1b;
Chris@82 347 Cr[WS(csr, 2)] = T19 + T1c;
Chris@82 348 Cr[WS(csr, 6)] = T1c - T19;
Chris@82 349 T1d = T1b + T1a;
Chris@82 350 Cr[WS(csr, 3)] = T1d - T1e;
Chris@82 351 Cr[WS(csr, 4)] = T1e + T1d;
Chris@82 352 }
Chris@82 353 }
Chris@82 354 }
Chris@82 355 }
Chris@82 356
Chris@82 357 static const kr2c_desc desc = { 13, "r2cf_13", {57, 15, 19, 0}, &GENUS };
Chris@82 358
Chris@82 359 void X(codelet_r2cf_13) (planner *p) {
Chris@82 360 X(kr2c_register) (p, r2cf_13, &desc);
Chris@82 361 }
Chris@82 362
Chris@82 363 #endif