annotate src/fftw-3.3.5/rdft/scalar/r2cf/r2cf_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:46:05 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cf.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include r2cf.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 76 FP additions, 51 FP multiplications,
Chris@42 32 * (or, 31 additions, 6 multiplications, 45 fused multiply/add),
Chris@42 33 * 68 stack variables, 23 constants, and 26 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cf.h"
Chris@42 36
Chris@42 37 static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@42 40 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@42 41 DK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@42 42 DK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@42 43 DK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@42 44 DK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@42 45 DK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@42 46 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@42 47 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 48 DK(KP904176221, +0.904176221990848204433795481776887926501523162);
Chris@42 49 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@42 50 DK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@42 51 DK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@42 52 DK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@42 53 DK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@42 54 DK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@42 55 DK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@42 56 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@42 57 DK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@42 58 DK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@42 59 DK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@42 60 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 61 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 62 {
Chris@42 63 INT i;
Chris@42 64 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@42 65 E T15, T1a, T11, T17, T14, T1b;
Chris@42 66 {
Chris@42 67 E TN, TD, TV, TA, Tb, TZ, T12, TS, Tx, Tu, Ti, TU;
Chris@42 68 TN = R0[0];
Chris@42 69 {
Chris@42 70 E T3, TP, Th, TB, Tp, Te, Tm, TC, Tr, T6, T9, Ts;
Chris@42 71 {
Chris@42 72 E Tn, Tf, Tg, T1, T2;
Chris@42 73 T1 = R0[WS(rs, 4)];
Chris@42 74 T2 = R1[WS(rs, 2)];
Chris@42 75 Tn = R0[WS(rs, 6)];
Chris@42 76 Tf = R0[WS(rs, 5)];
Chris@42 77 Tg = R0[WS(rs, 2)];
Chris@42 78 T3 = T1 - T2;
Chris@42 79 TP = T1 + T2;
Chris@42 80 {
Chris@42 81 E Tk, To, Tc, Td;
Chris@42 82 Tk = R1[0];
Chris@42 83 Th = Tf - Tg;
Chris@42 84 To = Tf + Tg;
Chris@42 85 Tc = R1[WS(rs, 4)];
Chris@42 86 Td = R1[WS(rs, 1)];
Chris@42 87 {
Chris@42 88 E T4, Tl, T5, T7, T8;
Chris@42 89 T4 = R1[WS(rs, 5)];
Chris@42 90 TB = Tn + To;
Chris@42 91 Tp = FMS(KP500000000, To, Tn);
Chris@42 92 Tl = Td + Tc;
Chris@42 93 Te = Tc - Td;
Chris@42 94 T5 = R0[WS(rs, 3)];
Chris@42 95 T7 = R1[WS(rs, 3)];
Chris@42 96 T8 = R0[WS(rs, 1)];
Chris@42 97 Tm = FNMS(KP500000000, Tl, Tk);
Chris@42 98 TC = Tk + Tl;
Chris@42 99 Tr = T4 + T5;
Chris@42 100 T6 = T4 - T5;
Chris@42 101 T9 = T7 - T8;
Chris@42 102 Ts = T7 + T8;
Chris@42 103 }
Chris@42 104 }
Chris@42 105 }
Chris@42 106 {
Chris@42 107 E TO, Ta, Tt, TQ;
Chris@42 108 TD = TB - TC;
Chris@42 109 TO = TC + TB;
Chris@42 110 Ta = T6 + T9;
Chris@42 111 TV = T6 - T9;
Chris@42 112 Tt = Tr - Ts;
Chris@42 113 TQ = Tr + Ts;
Chris@42 114 {
Chris@42 115 E TX, Tq, TR, TY;
Chris@42 116 TX = Tm - Tp;
Chris@42 117 Tq = Tm + Tp;
Chris@42 118 TA = T3 + Ta;
Chris@42 119 Tb = FNMS(KP500000000, Ta, T3);
Chris@42 120 TR = TP + TQ;
Chris@42 121 TY = FNMS(KP500000000, TQ, TP);
Chris@42 122 TZ = TX + TY;
Chris@42 123 T12 = TX - TY;
Chris@42 124 T15 = TO - TR;
Chris@42 125 TS = TO + TR;
Chris@42 126 Tx = FNMS(KP866025403, Tt, Tq);
Chris@42 127 Tu = FMA(KP866025403, Tt, Tq);
Chris@42 128 Ti = Te + Th;
Chris@42 129 TU = Th - Te;
Chris@42 130 }
Chris@42 131 }
Chris@42 132 }
Chris@42 133 Cr[0] = TN + TS;
Chris@42 134 {
Chris@42 135 E Tw, Tj, T13, TW;
Chris@42 136 Tw = FNMS(KP866025403, Ti, Tb);
Chris@42 137 Tj = FMA(KP866025403, Ti, Tb);
Chris@42 138 T13 = TU - TV;
Chris@42 139 TW = TU + TV;
Chris@42 140 {
Chris@42 141 E TE, TI, Tv, TF, TG, Ty;
Chris@42 142 TE = FMA(KP302775637, TD, TA);
Chris@42 143 TI = FNMS(KP302775637, TA, TD);
Chris@42 144 Tv = FMA(KP038632954, Tu, Tj);
Chris@42 145 TF = FNMS(KP038632954, Tj, Tu);
Chris@42 146 TG = FNMS(KP612264650, Tw, Tx);
Chris@42 147 Ty = FMA(KP612264650, Tx, Tw);
Chris@42 148 {
Chris@42 149 E TT, Tz, TK, TH, TM, T10, TL, TJ;
Chris@42 150 TT = FNMS(KP083333333, TS, TN);
Chris@42 151 Tz = FNMS(KP853480001, Ty, Tv);
Chris@42 152 TK = FMA(KP853480001, Ty, Tv);
Chris@42 153 TH = FNMS(KP853480001, TG, TF);
Chris@42 154 TM = FMA(KP853480001, TG, TF);
Chris@42 155 T1a = FNMS(KP226109445, TW, TZ);
Chris@42 156 T10 = FMA(KP301479260, TZ, TW);
Chris@42 157 TL = FNMS(KP522026385, Tz, TE);
Chris@42 158 Ci[WS(csi, 1)] = KP600477271 * (FMA(KP957805992, TE, Tz));
Chris@42 159 TJ = FMA(KP522026385, TH, TI);
Chris@42 160 Ci[WS(csi, 5)] = -(KP600477271 * (FNMS(KP957805992, TI, TH)));
Chris@42 161 Ci[WS(csi, 4)] = -(KP575140729 * (FMA(KP904176221, TM, TL)));
Chris@42 162 Ci[WS(csi, 3)] = KP575140729 * (FNMS(KP904176221, TM, TL));
Chris@42 163 Ci[WS(csi, 6)] = KP575140729 * (FMA(KP904176221, TK, TJ));
Chris@42 164 Ci[WS(csi, 2)] = KP575140729 * (FNMS(KP904176221, TK, TJ));
Chris@42 165 T11 = FMA(KP503537032, T10, TT);
Chris@42 166 T17 = FNMS(KP251768516, T10, TT);
Chris@42 167 }
Chris@42 168 T14 = FNMS(KP514918778, T13, T12);
Chris@42 169 T1b = FMA(KP686558370, T12, T13);
Chris@42 170 }
Chris@42 171 }
Chris@42 172 }
Chris@42 173 {
Chris@42 174 E T1e, T1c, T18, T16, T1d, T19;
Chris@42 175 T1e = FMA(KP769338817, T1b, T1a);
Chris@42 176 T1c = FNMS(KP769338817, T1b, T1a);
Chris@42 177 T18 = FNMS(KP859542535, T14, T15);
Chris@42 178 T16 = FMA(KP581704778, T15, T14);
Chris@42 179 T1d = FNMS(KP300462606, T18, T17);
Chris@42 180 T19 = FMA(KP300462606, T18, T17);
Chris@42 181 Cr[WS(csr, 1)] = FMA(KP516520780, T16, T11);
Chris@42 182 Cr[WS(csr, 5)] = FNMS(KP516520780, T16, T11);
Chris@42 183 Cr[WS(csr, 2)] = FMA(KP503537032, T1e, T1d);
Chris@42 184 Cr[WS(csr, 6)] = FNMS(KP503537032, T1e, T1d);
Chris@42 185 Cr[WS(csr, 3)] = FMA(KP503537032, T1c, T19);
Chris@42 186 Cr[WS(csr, 4)] = FNMS(KP503537032, T1c, T19);
Chris@42 187 }
Chris@42 188 }
Chris@42 189 }
Chris@42 190 }
Chris@42 191
Chris@42 192 static const kr2c_desc desc = { 13, "r2cf_13", {31, 6, 45, 0}, &GENUS };
Chris@42 193
Chris@42 194 void X(codelet_r2cf_13) (planner *p) {
Chris@42 195 X(kr2c_register) (p, r2cf_13, &desc);
Chris@42 196 }
Chris@42 197
Chris@42 198 #else /* HAVE_FMA */
Chris@42 199
Chris@42 200 /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 13 -name r2cf_13 -include r2cf.h */
Chris@42 201
Chris@42 202 /*
Chris@42 203 * This function contains 76 FP additions, 34 FP multiplications,
Chris@42 204 * (or, 57 additions, 15 multiplications, 19 fused multiply/add),
Chris@42 205 * 55 stack variables, 20 constants, and 26 memory accesses
Chris@42 206 */
Chris@42 207 #include "r2cf.h"
Chris@42 208
Chris@42 209 static void r2cf_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 210 {
Chris@42 211 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@42 212 DK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@42 213 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@42 214 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 215 DK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@42 216 DK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@42 217 DK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@42 218 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@42 219 DK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@42 220 DK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@42 221 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 222 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@42 223 DK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@42 224 DK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@42 225 DK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@42 226 DK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@42 227 DK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@42 228 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@42 229 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 230 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 231 {
Chris@42 232 INT i;
Chris@42 233 for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@42 234 E T13, Tb, Tm, TW, TX, T14, TU, T10, Tz, TB, Tu, TC, TR, T11;
Chris@42 235 T13 = R0[0];
Chris@42 236 {
Chris@42 237 E Te, TO, Ta, Tv, To, T5, Tw, Tp, Th, Tr, Tk, Ts, Tl, TP, Tc;
Chris@42 238 E Td;
Chris@42 239 Tc = R0[WS(rs, 4)];
Chris@42 240 Td = R1[WS(rs, 2)];
Chris@42 241 Te = Tc - Td;
Chris@42 242 TO = Tc + Td;
Chris@42 243 {
Chris@42 244 E T6, T7, T8, T9;
Chris@42 245 T6 = R1[0];
Chris@42 246 T7 = R1[WS(rs, 1)];
Chris@42 247 T8 = R1[WS(rs, 4)];
Chris@42 248 T9 = T7 + T8;
Chris@42 249 Ta = T6 + T9;
Chris@42 250 Tv = T7 - T8;
Chris@42 251 To = FNMS(KP500000000, T9, T6);
Chris@42 252 }
Chris@42 253 {
Chris@42 254 E T1, T2, T3, T4;
Chris@42 255 T1 = R0[WS(rs, 6)];
Chris@42 256 T2 = R0[WS(rs, 5)];
Chris@42 257 T3 = R0[WS(rs, 2)];
Chris@42 258 T4 = T2 + T3;
Chris@42 259 T5 = T1 + T4;
Chris@42 260 Tw = T2 - T3;
Chris@42 261 Tp = FNMS(KP500000000, T4, T1);
Chris@42 262 }
Chris@42 263 {
Chris@42 264 E Tf, Tg, Ti, Tj;
Chris@42 265 Tf = R1[WS(rs, 5)];
Chris@42 266 Tg = R0[WS(rs, 3)];
Chris@42 267 Th = Tf - Tg;
Chris@42 268 Tr = Tf + Tg;
Chris@42 269 Ti = R1[WS(rs, 3)];
Chris@42 270 Tj = R0[WS(rs, 1)];
Chris@42 271 Tk = Ti - Tj;
Chris@42 272 Ts = Ti + Tj;
Chris@42 273 }
Chris@42 274 Tl = Th + Tk;
Chris@42 275 TP = Tr + Ts;
Chris@42 276 Tb = T5 - Ta;
Chris@42 277 Tm = Te + Tl;
Chris@42 278 TW = Ta + T5;
Chris@42 279 TX = TO + TP;
Chris@42 280 T14 = TW + TX;
Chris@42 281 {
Chris@42 282 E TS, TT, Tx, Ty;
Chris@42 283 TS = Tv + Tw;
Chris@42 284 TT = Th - Tk;
Chris@42 285 TU = TS - TT;
Chris@42 286 T10 = TS + TT;
Chris@42 287 Tx = KP866025403 * (Tv - Tw);
Chris@42 288 Ty = FNMS(KP500000000, Tl, Te);
Chris@42 289 Tz = Tx + Ty;
Chris@42 290 TB = Ty - Tx;
Chris@42 291 }
Chris@42 292 {
Chris@42 293 E Tq, Tt, TN, TQ;
Chris@42 294 Tq = To - Tp;
Chris@42 295 Tt = KP866025403 * (Tr - Ts);
Chris@42 296 Tu = Tq - Tt;
Chris@42 297 TC = Tq + Tt;
Chris@42 298 TN = To + Tp;
Chris@42 299 TQ = FNMS(KP500000000, TP, TO);
Chris@42 300 TR = TN - TQ;
Chris@42 301 T11 = TN + TQ;
Chris@42 302 }
Chris@42 303 }
Chris@42 304 Cr[0] = T13 + T14;
Chris@42 305 {
Chris@42 306 E Tn, TG, TE, TF, TJ, TM, TK, TL;
Chris@42 307 Tn = FNMS(KP174138601, Tm, KP575140729 * Tb);
Chris@42 308 TG = FMA(KP174138601, Tb, KP575140729 * Tm);
Chris@42 309 {
Chris@42 310 E TA, TD, TH, TI;
Chris@42 311 TA = FNMS(KP156891391, Tz, KP256247671 * Tu);
Chris@42 312 TD = FNMS(KP300238635, TC, KP011599105 * TB);
Chris@42 313 TE = TA + TD;
Chris@42 314 TF = KP1_732050807 * (TD - TA);
Chris@42 315 TH = FMA(KP300238635, TB, KP011599105 * TC);
Chris@42 316 TI = FMA(KP256247671, Tz, KP156891391 * Tu);
Chris@42 317 TJ = TH - TI;
Chris@42 318 TM = KP1_732050807 * (TI + TH);
Chris@42 319 }
Chris@42 320 Ci[WS(csi, 5)] = FMA(KP2_000000000, TE, Tn);
Chris@42 321 Ci[WS(csi, 1)] = FMA(KP2_000000000, TJ, TG);
Chris@42 322 TK = TG - TJ;
Chris@42 323 Ci[WS(csi, 4)] = TF - TK;
Chris@42 324 Ci[WS(csi, 3)] = TF + TK;
Chris@42 325 TL = Tn - TE;
Chris@42 326 Ci[WS(csi, 2)] = TL - TM;
Chris@42 327 Ci[WS(csi, 6)] = TL + TM;
Chris@42 328 }
Chris@42 329 {
Chris@42 330 E TZ, T1b, T19, T1e, T16, T1a, TV, TY, T1c, T1d;
Chris@42 331 TV = FNMS(KP132983124, TU, KP258260390 * TR);
Chris@42 332 TY = KP300462606 * (TW - TX);
Chris@42 333 TZ = FMA(KP2_000000000, TV, TY);
Chris@42 334 T1b = TY - TV;
Chris@42 335 {
Chris@42 336 E T17, T18, T12, T15;
Chris@42 337 T17 = FMA(KP387390585, TU, KP265966249 * TR);
Chris@42 338 T18 = FNMS(KP503537032, T11, KP113854479 * T10);
Chris@42 339 T19 = T17 - T18;
Chris@42 340 T1e = T17 + T18;
Chris@42 341 T12 = FMA(KP251768516, T10, KP075902986 * T11);
Chris@42 342 T15 = FNMS(KP083333333, T14, T13);
Chris@42 343 T16 = FMA(KP2_000000000, T12, T15);
Chris@42 344 T1a = T15 - T12;
Chris@42 345 }
Chris@42 346 Cr[WS(csr, 1)] = TZ + T16;
Chris@42 347 Cr[WS(csr, 5)] = T16 - TZ;
Chris@42 348 T1c = T1a - T1b;
Chris@42 349 Cr[WS(csr, 2)] = T19 + T1c;
Chris@42 350 Cr[WS(csr, 6)] = T1c - T19;
Chris@42 351 T1d = T1b + T1a;
Chris@42 352 Cr[WS(csr, 3)] = T1d - T1e;
Chris@42 353 Cr[WS(csr, 4)] = T1e + T1d;
Chris@42 354 }
Chris@42 355 }
Chris@42 356 }
Chris@42 357 }
Chris@42 358
Chris@42 359 static const kr2c_desc desc = { 13, "r2cf_13", {57, 15, 19, 0}, &GENUS };
Chris@42 360
Chris@42 361 void X(codelet_r2cf_13) (planner *p) {
Chris@42 362 X(kr2c_register) (p, r2cf_13, &desc);
Chris@42 363 }
Chris@42 364
Chris@42 365 #endif /* HAVE_FMA */