annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cb_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:27 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 13 -name r2cb_13 -include r2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 76 FP additions, 58 FP multiplications,
Chris@42 32 * (or, 18 additions, 0 multiplications, 58 fused multiply/add),
Chris@42 33 * 76 stack variables, 26 constants, and 26 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cb.h"
Chris@42 36
Chris@42 37 static void r2cb_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP968287244, +0.968287244361984016049539446938120421179794516);
Chris@42 40 DK(KP875502302, +0.875502302409147941146295545768755143177842006);
Chris@42 41 DK(KP1_150281458, +1.150281458948006242736771094910906776922003215);
Chris@42 42 DK(KP1_040057143, +1.040057143777729238234261000998465604986476278);
Chris@42 43 DK(KP1_200954543, +1.200954543865330565851538506669526018704025697);
Chris@42 44 DK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@42 45 DK(KP600925212, +0.600925212577331548853203544578415991041882762);
Chris@42 46 DK(KP1_033041561, +1.033041561246979445681802577138034271410067244);
Chris@42 47 DK(KP1_007074065, +1.007074065727533254493747707736933954186697125);
Chris@42 48 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 49 DK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@42 50 DK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@42 51 DK(KP166666666, +0.166666666666666666666666666666666666666666667);
Chris@42 52 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 53 DK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@42 54 DK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@42 55 DK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@42 56 DK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@42 57 DK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@42 58 DK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@42 59 DK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@42 60 DK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@42 61 DK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@42 62 DK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@42 63 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@42 64 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 65 {
Chris@42 66 INT i;
Chris@42 67 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@42 68 E TW, T14, TS, TO, T18, T1e, TY, TX, TQ, Tq, TP, Tl, T1d, Tr;
Chris@42 69 {
Chris@42 70 E T1, TN, T16, TJ, TV, TG, TU, Tf, T2, T3, Tb, Ti, T4;
Chris@42 71 {
Chris@42 72 E Ts, TB, Tx, Ty, Tv, TE, Tt, Tu, Tz, TC;
Chris@42 73 Ts = Ci[WS(csi, 5)];
Chris@42 74 Tt = Ci[WS(csi, 2)];
Chris@42 75 Tu = Ci[WS(csi, 6)];
Chris@42 76 TB = Ci[WS(csi, 1)];
Chris@42 77 Tx = Ci[WS(csi, 3)];
Chris@42 78 Ty = Ci[WS(csi, 4)];
Chris@42 79 Tv = Tt + Tu;
Chris@42 80 TE = Tu - Tt;
Chris@42 81 T1 = Cr[0];
Chris@42 82 Tz = Tx + Ty;
Chris@42 83 TC = Tx - Ty;
Chris@42 84 {
Chris@42 85 E TL, Tw, T7, Ta;
Chris@42 86 TL = Ts + Tv;
Chris@42 87 Tw = FNMS(KP500000000, Tv, Ts);
Chris@42 88 T7 = Cr[WS(csr, 5)];
Chris@42 89 {
Chris@42 90 E TD, TM, TA, TH;
Chris@42 91 TD = FNMS(KP500000000, TC, TB);
Chris@42 92 TM = TB + TC;
Chris@42 93 TA = FMA(KP866025403, Tz, Tw);
Chris@42 94 TH = FNMS(KP866025403, Tz, Tw);
Chris@42 95 TN = FMA(KP302775637, TM, TL);
Chris@42 96 T16 = FNMS(KP302775637, TL, TM);
Chris@42 97 {
Chris@42 98 E TF, TI, T8, T9;
Chris@42 99 TF = FMA(KP866025403, TE, TD);
Chris@42 100 TI = FNMS(KP866025403, TE, TD);
Chris@42 101 T8 = Cr[WS(csr, 2)];
Chris@42 102 T9 = Cr[WS(csr, 6)];
Chris@42 103 TJ = FNMS(KP612264650, TI, TH);
Chris@42 104 TV = FMA(KP612264650, TH, TI);
Chris@42 105 TG = FNMS(KP038632954, TF, TA);
Chris@42 106 TU = FMA(KP038632954, TA, TF);
Chris@42 107 Tf = T8 - T9;
Chris@42 108 Ta = T8 + T9;
Chris@42 109 }
Chris@42 110 }
Chris@42 111 T2 = Cr[WS(csr, 1)];
Chris@42 112 T3 = Cr[WS(csr, 3)];
Chris@42 113 Tb = T7 + Ta;
Chris@42 114 Ti = FMS(KP500000000, Ta, T7);
Chris@42 115 T4 = Cr[WS(csr, 4)];
Chris@42 116 }
Chris@42 117 }
Chris@42 118 {
Chris@42 119 E T17, TK, T5, Te, Tk, Td;
Chris@42 120 TW = FMA(KP853480001, TV, TU);
Chris@42 121 T17 = FNMS(KP853480001, TV, TU);
Chris@42 122 TK = FNMS(KP853480001, TJ, TG);
Chris@42 123 T14 = FMA(KP853480001, TJ, TG);
Chris@42 124 T5 = T3 + T4;
Chris@42 125 Te = T3 - T4;
Chris@42 126 {
Chris@42 127 E Tn, Tg, Th, T6;
Chris@42 128 TS = FNMS(KP522026385, TK, TN);
Chris@42 129 TO = FMA(KP957805992, TN, TK);
Chris@42 130 Tn = Te - Tf;
Chris@42 131 Tg = Te + Tf;
Chris@42 132 Th = FNMS(KP500000000, T5, T2);
Chris@42 133 T6 = T2 + T5;
Chris@42 134 T18 = FNMS(KP522026385, T17, T16);
Chris@42 135 T1e = FMA(KP957805992, T16, T17);
Chris@42 136 {
Chris@42 137 E Tm, Tj, Tc, Tp, To;
Chris@42 138 Tm = Th + Ti;
Chris@42 139 Tj = Th - Ti;
Chris@42 140 Tc = T6 + Tb;
Chris@42 141 Tp = T6 - Tb;
Chris@42 142 To = FNMS(KP514918778, Tn, Tm);
Chris@42 143 TY = FMA(KP686558370, Tm, Tn);
Chris@42 144 TX = FNMS(KP226109445, Tg, Tj);
Chris@42 145 Tk = FMA(KP301479260, Tj, Tg);
Chris@42 146 R0[0] = FMA(KP2_000000000, Tc, T1);
Chris@42 147 Td = FNMS(KP166666666, Tc, T1);
Chris@42 148 TQ = FNMS(KP859542535, To, Tp);
Chris@42 149 Tq = FMA(KP581704778, Tp, To);
Chris@42 150 }
Chris@42 151 }
Chris@42 152 TP = FNMS(KP503537032, Tk, Td);
Chris@42 153 Tl = FMA(KP1_007074065, Tk, Td);
Chris@42 154 }
Chris@42 155 }
Chris@42 156 T1d = FNMS(KP1_033041561, Tq, Tl);
Chris@42 157 Tr = FMA(KP1_033041561, Tq, Tl);
Chris@42 158 {
Chris@42 159 E T13, TR, T19, TZ;
Chris@42 160 T13 = FNMS(KP600925212, TQ, TP);
Chris@42 161 TR = FMA(KP600925212, TQ, TP);
Chris@42 162 T19 = FMA(KP769338817, TY, TX);
Chris@42 163 TZ = FNMS(KP769338817, TY, TX);
Chris@42 164 R0[WS(rs, 4)] = FMA(KP1_200954543, T1e, T1d);
Chris@42 165 R1[WS(rs, 2)] = FNMS(KP1_200954543, T1e, T1d);
Chris@42 166 R0[WS(rs, 6)] = FMA(KP1_200954543, TO, Tr);
Chris@42 167 R1[0] = FNMS(KP1_200954543, TO, Tr);
Chris@42 168 {
Chris@42 169 E T1b, T15, T11, TT;
Chris@42 170 T1b = FNMS(KP1_040057143, T14, T13);
Chris@42 171 T15 = FMA(KP1_040057143, T14, T13);
Chris@42 172 T11 = FMA(KP1_150281458, TS, TR);
Chris@42 173 TT = FNMS(KP1_150281458, TS, TR);
Chris@42 174 {
Chris@42 175 E T1c, T1a, T12, T10;
Chris@42 176 T1c = FMA(KP875502302, T19, T18);
Chris@42 177 T1a = FNMS(KP875502302, T19, T18);
Chris@42 178 T12 = FMA(KP968287244, TZ, TW);
Chris@42 179 T10 = FNMS(KP968287244, TZ, TW);
Chris@42 180 R1[WS(rs, 5)] = FMA(KP1_150281458, T1c, T1b);
Chris@42 181 R0[WS(rs, 3)] = FNMS(KP1_150281458, T1c, T1b);
Chris@42 182 R1[WS(rs, 3)] = FMA(KP1_150281458, T1a, T15);
Chris@42 183 R0[WS(rs, 1)] = FNMS(KP1_150281458, T1a, T15);
Chris@42 184 R0[WS(rs, 5)] = FMA(KP1_040057143, T12, T11);
Chris@42 185 R0[WS(rs, 2)] = FNMS(KP1_040057143, T12, T11);
Chris@42 186 R1[WS(rs, 4)] = FMA(KP1_040057143, T10, TT);
Chris@42 187 R1[WS(rs, 1)] = FNMS(KP1_040057143, T10, TT);
Chris@42 188 }
Chris@42 189 }
Chris@42 190 }
Chris@42 191 }
Chris@42 192 }
Chris@42 193 }
Chris@42 194
Chris@42 195 static const kr2c_desc desc = { 13, "r2cb_13", {18, 0, 58, 0}, &GENUS };
Chris@42 196
Chris@42 197 void X(codelet_r2cb_13) (planner *p) {
Chris@42 198 X(kr2c_register) (p, r2cb_13, &desc);
Chris@42 199 }
Chris@42 200
Chris@42 201 #else /* HAVE_FMA */
Chris@42 202
Chris@42 203 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 13 -name r2cb_13 -include r2cb.h */
Chris@42 204
Chris@42 205 /*
Chris@42 206 * This function contains 76 FP additions, 35 FP multiplications,
Chris@42 207 * (or, 56 additions, 15 multiplications, 20 fused multiply/add),
Chris@42 208 * 56 stack variables, 19 constants, and 26 memory accesses
Chris@42 209 */
Chris@42 210 #include "r2cb.h"
Chris@42 211
Chris@42 212 static void r2cb_13(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 213 {
Chris@42 214 DK(KP1_007074065, +1.007074065727533254493747707736933954186697125);
Chris@42 215 DK(KP227708958, +0.227708958111581597949308691735310621069285120);
Chris@42 216 DK(KP531932498, +0.531932498429674575175042127684371897596660533);
Chris@42 217 DK(KP774781170, +0.774781170935234584261351932853525703557550433);
Chris@42 218 DK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@42 219 DK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@42 220 DK(KP151805972, +0.151805972074387731966205794490207080712856746);
Chris@42 221 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@42 222 DK(KP166666666, +0.166666666666666666666666666666666666666666667);
Chris@42 223 DK(KP600925212, +0.600925212577331548853203544578415991041882762);
Chris@42 224 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 225 DK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@42 226 DK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@42 227 DK(KP348277202, +0.348277202304271810011321589858529485233929352);
Chris@42 228 DK(KP1_150281458, +1.150281458948006242736771094910906776922003215);
Chris@42 229 DK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@42 230 DK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@42 231 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@42 232 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 233 {
Chris@42 234 INT i;
Chris@42 235 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(52, rs), MAKE_VOLATILE_STRIDE(52, csr), MAKE_VOLATILE_STRIDE(52, csi)) {
Chris@42 236 E TG, TS, TR, T15, TJ, TT, T1, Tm, Tc, Td, Tg, Tj, Tk, Tn, To;
Chris@42 237 E Tp;
Chris@42 238 {
Chris@42 239 E Ts, Tv, Tw, TE, TC, TB, Tz, TD, TA, TF;
Chris@42 240 {
Chris@42 241 E Tt, Tu, Tx, Ty;
Chris@42 242 Ts = Ci[WS(csi, 1)];
Chris@42 243 Tt = Ci[WS(csi, 3)];
Chris@42 244 Tu = Ci[WS(csi, 4)];
Chris@42 245 Tv = Tt - Tu;
Chris@42 246 Tw = FMS(KP2_000000000, Ts, Tv);
Chris@42 247 TE = KP1_732050807 * (Tt + Tu);
Chris@42 248 TC = Ci[WS(csi, 5)];
Chris@42 249 Tx = Ci[WS(csi, 6)];
Chris@42 250 Ty = Ci[WS(csi, 2)];
Chris@42 251 TB = Tx + Ty;
Chris@42 252 Tz = KP1_732050807 * (Tx - Ty);
Chris@42 253 TD = FNMS(KP2_000000000, TC, TB);
Chris@42 254 }
Chris@42 255 TA = Tw + Tz;
Chris@42 256 TF = TD - TE;
Chris@42 257 TG = FMA(KP011599105, TA, KP300238635 * TF);
Chris@42 258 TS = FNMS(KP011599105, TF, KP300238635 * TA);
Chris@42 259 {
Chris@42 260 E TP, TQ, TH, TI;
Chris@42 261 TP = Ts + Tv;
Chris@42 262 TQ = TB + TC;
Chris@42 263 TR = FNMS(KP348277202, TQ, KP1_150281458 * TP);
Chris@42 264 T15 = FMA(KP348277202, TP, KP1_150281458 * TQ);
Chris@42 265 TH = Tw - Tz;
Chris@42 266 TI = TE + TD;
Chris@42 267 TJ = FMA(KP156891391, TH, KP256247671 * TI);
Chris@42 268 TT = FNMS(KP256247671, TH, KP156891391 * TI);
Chris@42 269 }
Chris@42 270 }
Chris@42 271 {
Chris@42 272 E Tb, Ti, Tf, T6, Th, Te;
Chris@42 273 T1 = Cr[0];
Chris@42 274 {
Chris@42 275 E T7, T8, T9, Ta;
Chris@42 276 T7 = Cr[WS(csr, 5)];
Chris@42 277 T8 = Cr[WS(csr, 2)];
Chris@42 278 T9 = Cr[WS(csr, 6)];
Chris@42 279 Ta = T8 + T9;
Chris@42 280 Tb = T7 + Ta;
Chris@42 281 Ti = FNMS(KP500000000, Ta, T7);
Chris@42 282 Tf = T8 - T9;
Chris@42 283 }
Chris@42 284 {
Chris@42 285 E T2, T3, T4, T5;
Chris@42 286 T2 = Cr[WS(csr, 1)];
Chris@42 287 T3 = Cr[WS(csr, 3)];
Chris@42 288 T4 = Cr[WS(csr, 4)];
Chris@42 289 T5 = T3 + T4;
Chris@42 290 T6 = T2 + T5;
Chris@42 291 Th = FNMS(KP500000000, T5, T2);
Chris@42 292 Te = T3 - T4;
Chris@42 293 }
Chris@42 294 Tm = KP600925212 * (T6 - Tb);
Chris@42 295 Tc = T6 + Tb;
Chris@42 296 Td = FNMS(KP166666666, Tc, T1);
Chris@42 297 Tg = Te + Tf;
Chris@42 298 Tj = Th + Ti;
Chris@42 299 Tk = FMA(KP503537032, Tg, KP151805972 * Tj);
Chris@42 300 Tn = Th - Ti;
Chris@42 301 To = Te - Tf;
Chris@42 302 Tp = FNMS(KP265966249, To, KP516520780 * Tn);
Chris@42 303 }
Chris@42 304 R0[0] = FMA(KP2_000000000, Tc, T1);
Chris@42 305 {
Chris@42 306 E TK, T1b, TV, T12, T16, T18, TO, T1a, Tr, T17, T11, T13;
Chris@42 307 {
Chris@42 308 E TU, T14, TM, TN;
Chris@42 309 TK = KP1_732050807 * (TG + TJ);
Chris@42 310 T1b = KP1_732050807 * (TS - TT);
Chris@42 311 TU = TS + TT;
Chris@42 312 TV = TR - TU;
Chris@42 313 T12 = FMA(KP2_000000000, TU, TR);
Chris@42 314 T14 = TG - TJ;
Chris@42 315 T16 = FMS(KP2_000000000, T14, T15);
Chris@42 316 T18 = T14 + T15;
Chris@42 317 TM = FMA(KP774781170, To, KP531932498 * Tn);
Chris@42 318 TN = FNMS(KP1_007074065, Tj, KP227708958 * Tg);
Chris@42 319 TO = TM - TN;
Chris@42 320 T1a = TM + TN;
Chris@42 321 {
Chris@42 322 E Tl, Tq, TZ, T10;
Chris@42 323 Tl = Td - Tk;
Chris@42 324 Tq = Tm - Tp;
Chris@42 325 Tr = Tl - Tq;
Chris@42 326 T17 = Tq + Tl;
Chris@42 327 TZ = FMA(KP2_000000000, Tk, Td);
Chris@42 328 T10 = FMA(KP2_000000000, Tp, Tm);
Chris@42 329 T11 = TZ - T10;
Chris@42 330 T13 = T10 + TZ;
Chris@42 331 }
Chris@42 332 }
Chris@42 333 R1[WS(rs, 2)] = T11 - T12;
Chris@42 334 R0[WS(rs, 6)] = T13 - T16;
Chris@42 335 R1[0] = T13 + T16;
Chris@42 336 R0[WS(rs, 4)] = T11 + T12;
Chris@42 337 {
Chris@42 338 E TL, TW, T19, T1c;
Chris@42 339 TL = Tr - TK;
Chris@42 340 TW = TO - TV;
Chris@42 341 R1[WS(rs, 3)] = TL - TW;
Chris@42 342 R0[WS(rs, 1)] = TL + TW;
Chris@42 343 T19 = T17 - T18;
Chris@42 344 T1c = T1a + T1b;
Chris@42 345 R1[WS(rs, 1)] = T19 - T1c;
Chris@42 346 R1[WS(rs, 4)] = T1c + T19;
Chris@42 347 }
Chris@42 348 {
Chris@42 349 E T1d, T1e, TX, TY;
Chris@42 350 T1d = T1a - T1b;
Chris@42 351 T1e = T17 + T18;
Chris@42 352 R0[WS(rs, 2)] = T1d + T1e;
Chris@42 353 R0[WS(rs, 5)] = T1e - T1d;
Chris@42 354 TX = Tr + TK;
Chris@42 355 TY = TO + TV;
Chris@42 356 R0[WS(rs, 3)] = TX - TY;
Chris@42 357 R1[WS(rs, 5)] = TX + TY;
Chris@42 358 }
Chris@42 359 }
Chris@42 360 }
Chris@42 361 }
Chris@42 362 }
Chris@42 363
Chris@42 364 static const kr2c_desc desc = { 13, "r2cb_13", {56, 15, 20, 0}, &GENUS };
Chris@42 365
Chris@42 366 void X(codelet_r2cb_13) (planner *p) {
Chris@42 367 X(kr2c_register) (p, r2cb_13, &desc);
Chris@42 368 }
Chris@42 369
Chris@42 370 #endif /* HAVE_FMA */