annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cb_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:49:35 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include r2cb.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 152 FP additions, 120 FP multiplications,
Chris@42 32 * (or, 32 additions, 0 multiplications, 120 fused multiply/add),
Chris@42 33 * 115 stack variables, 44 constants, and 50 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cb.h"
Chris@42 36
Chris@42 37 static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP979740652, +0.979740652857618686258237536568998933733477632);
Chris@42 40 DK(KP438153340, +0.438153340021931793654057951961031291699532119);
Chris@42 41 DK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@42 42 DK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@42 43 DK(KP1_606007150, +1.606007150877320829666881187140752009270929701);
Chris@42 44 DK(KP1_721083328, +1.721083328735889354196523361841037632825608373);
Chris@42 45 DK(KP1_011627398, +1.011627398597394192215998921771049272931807941);
Chris@42 46 DK(KP595480289, +0.595480289600000014706716770488118292997907308);
Chris@42 47 DK(KP641441904, +0.641441904830606407298806329068862424939687989);
Chris@42 48 DK(KP452413526, +0.452413526233009763856834323966348796985206956);
Chris@42 49 DK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@42 50 DK(KP933137358, +0.933137358350283770603023973254446451924190884);
Chris@42 51 DK(KP1_666834356, +1.666834356657377354817925100486477686277992119);
Chris@42 52 DK(KP1_842354653, +1.842354653930286640500894870830132058718564461);
Chris@42 53 DK(KP1_082908895, +1.082908895072625554092571180165639018104066379);
Chris@42 54 DK(KP662318342, +0.662318342759882818626911127577439236802190210);
Chris@42 55 DK(KP576710603, +0.576710603632765877371579268136471017090111488);
Chris@42 56 DK(KP484291580, +0.484291580564315559745084187732367906918006201);
Chris@42 57 DK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@42 58 DK(KP1_898359647, +1.898359647016882523151110931686726543423167685);
Chris@42 59 DK(KP1_386580726, +1.386580726567734802700860150804827247498955921);
Chris@42 60 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 61 DK(KP1_115827804, +1.115827804063668528375399296931134075984874304);
Chris@42 62 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@42 63 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@42 64 DK(KP499013364, +0.499013364214135780976168403431725276668452610);
Chris@42 65 DK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@42 66 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 67 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 68 DK(KP730409924, +0.730409924561256563751459444999838399157094302);
Chris@42 69 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@42 70 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@42 71 DK(KP451418159, +0.451418159099103183892477933432151804893354132);
Chris@42 72 DK(KP846146756, +0.846146756728608505452954290121135880883743802);
Chris@42 73 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@42 74 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@42 75 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@42 76 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 77 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 78 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 79 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@42 80 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 81 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 82 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 83 {
Chris@42 84 INT i;
Chris@42 85 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@42 86 E T1H, T24, T22, T1W, T1Y, T1X, T1Z, T23;
Chris@42 87 {
Chris@42 88 E T1G, Tu, T5, T1F, Tr, Te, T2o, T1N, T2a, T1t, TR, T1K, T29, T1u, TG;
Chris@42 89 E TU, TT, Tn, T1d, T1Q, T2p, T1T, T12, T1P, T1a;
Chris@42 90 {
Chris@42 91 E T1, T2, T3, Ts, Tt;
Chris@42 92 Ts = Ci[WS(csi, 5)];
Chris@42 93 Tt = Ci[WS(csi, 10)];
Chris@42 94 T1 = Cr[0];
Chris@42 95 T2 = Cr[WS(csr, 5)];
Chris@42 96 T3 = Cr[WS(csr, 10)];
Chris@42 97 T1G = FMS(KP618033988, Ts, Tt);
Chris@42 98 Tu = FMA(KP618033988, Tt, Ts);
Chris@42 99 {
Chris@42 100 E Tx, Tw, T1M, TQ, TM, T1J, TF, TL;
Chris@42 101 {
Chris@42 102 E T6, TH, TO, TP, TB, TI, Td, TJ, TE, T4, Tq, TK;
Chris@42 103 T6 = Cr[WS(csr, 1)];
Chris@42 104 T4 = T2 + T3;
Chris@42 105 Tq = T2 - T3;
Chris@42 106 TH = Ci[WS(csi, 1)];
Chris@42 107 {
Chris@42 108 E Ta, T9, Tb, T7, T8, Tp;
Chris@42 109 T7 = Cr[WS(csr, 6)];
Chris@42 110 T8 = Cr[WS(csr, 4)];
Chris@42 111 Tp = FNMS(KP500000000, T4, T1);
Chris@42 112 T5 = FMA(KP2_000000000, T4, T1);
Chris@42 113 Ta = Cr[WS(csr, 11)];
Chris@42 114 TO = T7 - T8;
Chris@42 115 T9 = T7 + T8;
Chris@42 116 T1F = FNMS(KP1_118033988, Tq, Tp);
Chris@42 117 Tr = FMA(KP1_118033988, Tq, Tp);
Chris@42 118 Tb = Cr[WS(csr, 9)];
Chris@42 119 {
Chris@42 120 E TC, TD, Tz, TA, Tc;
Chris@42 121 Tz = Ci[WS(csi, 6)];
Chris@42 122 TA = Ci[WS(csi, 4)];
Chris@42 123 TP = Tb - Ta;
Chris@42 124 Tc = Ta + Tb;
Chris@42 125 TC = Ci[WS(csi, 11)];
Chris@42 126 TB = Tz + TA;
Chris@42 127 TI = Tz - TA;
Chris@42 128 TD = Ci[WS(csi, 9)];
Chris@42 129 Td = T9 + Tc;
Chris@42 130 Tx = T9 - Tc;
Chris@42 131 TJ = TC - TD;
Chris@42 132 TE = TC + TD;
Chris@42 133 }
Chris@42 134 }
Chris@42 135 Te = T6 + Td;
Chris@42 136 Tw = FNMS(KP250000000, Td, T6);
Chris@42 137 T1M = FMA(KP618033988, TO, TP);
Chris@42 138 TQ = FNMS(KP618033988, TP, TO);
Chris@42 139 TK = TI + TJ;
Chris@42 140 TM = TI - TJ;
Chris@42 141 T1J = FNMS(KP618033988, TB, TE);
Chris@42 142 TF = FMA(KP618033988, TE, TB);
Chris@42 143 TL = FNMS(KP250000000, TK, TH);
Chris@42 144 T2o = TK + TH;
Chris@42 145 }
Chris@42 146 {
Chris@42 147 E Tf, T14, T1b, T1c, Tm, TY, T15, T16, T11, T17, T19, T18;
Chris@42 148 Tf = Cr[WS(csr, 2)];
Chris@42 149 {
Chris@42 150 E T1L, TN, T1I, Ty;
Chris@42 151 T1L = FNMS(KP559016994, TM, TL);
Chris@42 152 TN = FMA(KP559016994, TM, TL);
Chris@42 153 T1I = FNMS(KP559016994, Tx, Tw);
Chris@42 154 Ty = FMA(KP559016994, Tx, Tw);
Chris@42 155 T1N = FMA(KP951056516, T1M, T1L);
Chris@42 156 T2a = FNMS(KP951056516, T1M, T1L);
Chris@42 157 T1t = FNMS(KP951056516, TQ, TN);
Chris@42 158 TR = FMA(KP951056516, TQ, TN);
Chris@42 159 T1K = FMA(KP951056516, T1J, T1I);
Chris@42 160 T29 = FNMS(KP951056516, T1J, T1I);
Chris@42 161 T1u = FMA(KP951056516, TF, Ty);
Chris@42 162 TG = FNMS(KP951056516, TF, Ty);
Chris@42 163 T14 = Ci[WS(csi, 2)];
Chris@42 164 }
Chris@42 165 {
Chris@42 166 E Tg, Th, Tj, Tk;
Chris@42 167 Tg = Cr[WS(csr, 7)];
Chris@42 168 Th = Cr[WS(csr, 3)];
Chris@42 169 Tj = Cr[WS(csr, 12)];
Chris@42 170 Tk = Cr[WS(csr, 8)];
Chris@42 171 {
Chris@42 172 E TW, Ti, Tl, TX, TZ, T10;
Chris@42 173 TW = Ci[WS(csi, 7)];
Chris@42 174 T1b = Th - Tg;
Chris@42 175 Ti = Tg + Th;
Chris@42 176 T1c = Tj - Tk;
Chris@42 177 Tl = Tj + Tk;
Chris@42 178 TX = Ci[WS(csi, 3)];
Chris@42 179 TZ = Ci[WS(csi, 12)];
Chris@42 180 T10 = Ci[WS(csi, 8)];
Chris@42 181 Tm = Ti + Tl;
Chris@42 182 TU = Tl - Ti;
Chris@42 183 TY = TW + TX;
Chris@42 184 T15 = TW - TX;
Chris@42 185 T16 = TZ - T10;
Chris@42 186 T11 = TZ + T10;
Chris@42 187 }
Chris@42 188 }
Chris@42 189 TT = FNMS(KP250000000, Tm, Tf);
Chris@42 190 Tn = Tf + Tm;
Chris@42 191 T17 = T15 + T16;
Chris@42 192 T19 = T16 - T15;
Chris@42 193 T1d = FNMS(KP618033988, T1c, T1b);
Chris@42 194 T1Q = FMA(KP618033988, T1b, T1c);
Chris@42 195 T18 = FNMS(KP250000000, T17, T14);
Chris@42 196 T2p = T17 + T14;
Chris@42 197 T1T = FNMS(KP618033988, TY, T11);
Chris@42 198 T12 = FMA(KP618033988, T11, TY);
Chris@42 199 T1P = FMA(KP559016994, T19, T18);
Chris@42 200 T1a = FNMS(KP559016994, T19, T18);
Chris@42 201 }
Chris@42 202 }
Chris@42 203 }
Chris@42 204 {
Chris@42 205 E T1R, T1e, T1q, T1U, T13, T1r, T2b, T28, T25, T2i, T2k;
Chris@42 206 {
Chris@42 207 E T2m, To, T26, T27, TV, T1S;
Chris@42 208 T2m = Te - Tn;
Chris@42 209 To = Te + Tn;
Chris@42 210 TV = FNMS(KP559016994, TU, TT);
Chris@42 211 T1S = FMA(KP559016994, TU, TT);
Chris@42 212 T26 = FMA(KP951056516, T1Q, T1P);
Chris@42 213 T1R = FNMS(KP951056516, T1Q, T1P);
Chris@42 214 T1e = FNMS(KP951056516, T1d, T1a);
Chris@42 215 T1q = FMA(KP951056516, T1d, T1a);
Chris@42 216 T27 = FNMS(KP951056516, T1T, T1S);
Chris@42 217 T1U = FMA(KP951056516, T1T, T1S);
Chris@42 218 T13 = FNMS(KP951056516, T12, TV);
Chris@42 219 T1r = FMA(KP951056516, T12, TV);
Chris@42 220 {
Chris@42 221 E T2g, T2q, T2s, T2h, T2n, T2r, T2l;
Chris@42 222 T2g = FMA(KP939062505, T29, T2a);
Chris@42 223 T2b = FNMS(KP939062505, T2a, T29);
Chris@42 224 R0[0] = FMA(KP2_000000000, To, T5);
Chris@42 225 T2l = FNMS(KP500000000, To, T5);
Chris@42 226 T2q = FMA(KP618033988, T2p, T2o);
Chris@42 227 T2s = FNMS(KP618033988, T2o, T2p);
Chris@42 228 T28 = FNMS(KP062914667, T27, T26);
Chris@42 229 T2h = FMA(KP062914667, T26, T27);
Chris@42 230 T2n = FMA(KP1_118033988, T2m, T2l);
Chris@42 231 T2r = FNMS(KP1_118033988, T2m, T2l);
Chris@42 232 T25 = FMA(KP1_902113032, T1G, T1F);
Chris@42 233 T1H = FNMS(KP1_902113032, T1G, T1F);
Chris@42 234 T2i = FMA(KP846146756, T2h, T2g);
Chris@42 235 T2k = FNMS(KP451418159, T2g, T2h);
Chris@42 236 R0[WS(rs, 10)] = FMA(KP1_902113032, T2q, T2n);
Chris@42 237 R1[WS(rs, 2)] = FNMS(KP1_902113032, T2q, T2n);
Chris@42 238 R0[WS(rs, 5)] = FMA(KP1_902113032, T2s, T2r);
Chris@42 239 R1[WS(rs, 7)] = FNMS(KP1_902113032, T2s, T2r);
Chris@42 240 }
Chris@42 241 }
Chris@42 242 {
Chris@42 243 E TS, T1f, T1p, Tv, T2e, T1o, T1m, T2d, T1k, T1l, T2c;
Chris@42 244 TS = FNMS(KP256756360, TR, TG);
Chris@42 245 T1k = FMA(KP256756360, TG, TR);
Chris@42 246 T1l = FMA(KP549754652, T13, T1e);
Chris@42 247 T1f = FNMS(KP549754652, T1e, T13);
Chris@42 248 T1p = FMA(KP1_902113032, Tu, Tr);
Chris@42 249 Tv = FNMS(KP1_902113032, Tu, Tr);
Chris@42 250 T2e = FMA(KP730409924, T2b, T28);
Chris@42 251 T2c = FNMS(KP730409924, T2b, T28);
Chris@42 252 T1o = FNMS(KP683113946, T1k, T1l);
Chris@42 253 T1m = FMA(KP559154169, T1l, T1k);
Chris@42 254 R1[WS(rs, 1)] = FNMS(KP1_996053456, T2c, T25);
Chris@42 255 T2d = FMA(KP499013364, T2c, T25);
Chris@42 256 {
Chris@42 257 E T1C, T1E, T1y, T1w;
Chris@42 258 {
Chris@42 259 E T1s, T1v, T1i, T1h, T1n, T1j;
Chris@42 260 {
Chris@42 261 E T1A, T1B, T2f, T2j, T1g;
Chris@42 262 T1A = FNMS(KP470564281, T1q, T1r);
Chris@42 263 T1s = FMA(KP470564281, T1r, T1q);
Chris@42 264 T1v = FNMS(KP634619297, T1u, T1t);
Chris@42 265 T1B = FMA(KP634619297, T1t, T1u);
Chris@42 266 T2f = FMA(KP1_115827804, T2e, T2d);
Chris@42 267 T2j = FNMS(KP1_115827804, T2e, T2d);
Chris@42 268 T1i = FNMS(KP904730450, T1f, TS);
Chris@42 269 T1g = FMA(KP904730450, T1f, TS);
Chris@42 270 R1[WS(rs, 11)] = FMA(KP1_386580726, T2i, T2f);
Chris@42 271 R0[WS(rs, 4)] = FNMS(KP1_386580726, T2i, T2f);
Chris@42 272 R1[WS(rs, 6)] = FMA(KP1_898359647, T2k, T2j);
Chris@42 273 R0[WS(rs, 9)] = FNMS(KP1_898359647, T2k, T2j);
Chris@42 274 R1[0] = FMA(KP1_937166322, T1g, Tv);
Chris@42 275 T1h = FNMS(KP484291580, T1g, Tv);
Chris@42 276 T1C = FNMS(KP576710603, T1B, T1A);
Chris@42 277 T1E = FMA(KP662318342, T1A, T1B);
Chris@42 278 }
Chris@42 279 T1n = FNMS(KP1_082908895, T1i, T1h);
Chris@42 280 T1j = FMA(KP1_082908895, T1i, T1h);
Chris@42 281 R1[WS(rs, 10)] = FMA(KP1_842354653, T1m, T1j);
Chris@42 282 R0[WS(rs, 3)] = FNMS(KP1_842354653, T1m, T1j);
Chris@42 283 R1[WS(rs, 5)] = FMA(KP1_666834356, T1o, T1n);
Chris@42 284 R0[WS(rs, 8)] = FNMS(KP1_666834356, T1o, T1n);
Chris@42 285 T1y = FNMS(KP933137358, T1v, T1s);
Chris@42 286 T1w = FMA(KP933137358, T1v, T1s);
Chris@42 287 }
Chris@42 288 {
Chris@42 289 E T1O, T20, T21, T1V, T1x, T1z, T1D;
Chris@42 290 T1O = FNMS(KP549754652, T1N, T1K);
Chris@42 291 T20 = FMA(KP549754652, T1K, T1N);
Chris@42 292 T21 = FMA(KP634619297, T1R, T1U);
Chris@42 293 T1V = FNMS(KP634619297, T1U, T1R);
Chris@42 294 R0[WS(rs, 2)] = FNMS(KP1_809654104, T1w, T1p);
Chris@42 295 T1x = FMA(KP452413526, T1w, T1p);
Chris@42 296 T24 = FNMS(KP641441904, T20, T21);
Chris@42 297 T22 = FMA(KP595480289, T21, T20);
Chris@42 298 T1z = FNMS(KP1_011627398, T1y, T1x);
Chris@42 299 T1D = FMA(KP1_011627398, T1y, T1x);
Chris@42 300 R1[WS(rs, 9)] = FNMS(KP1_721083328, T1C, T1z);
Chris@42 301 R0[WS(rs, 7)] = FMA(KP1_721083328, T1C, T1z);
Chris@42 302 R0[WS(rs, 12)] = FMA(KP1_606007150, T1E, T1D);
Chris@42 303 R1[WS(rs, 4)] = FNMS(KP1_606007150, T1E, T1D);
Chris@42 304 T1W = FNMS(KP963507348, T1V, T1O);
Chris@42 305 T1Y = FMA(KP963507348, T1V, T1O);
Chris@42 306 }
Chris@42 307 }
Chris@42 308 }
Chris@42 309 }
Chris@42 310 }
Chris@42 311 R0[WS(rs, 1)] = FMA(KP1_752613360, T1W, T1H);
Chris@42 312 T1X = FNMS(KP438153340, T1W, T1H);
Chris@42 313 T1Z = FMA(KP979740652, T1Y, T1X);
Chris@42 314 T23 = FNMS(KP979740652, T1Y, T1X);
Chris@42 315 R0[WS(rs, 11)] = FMA(KP1_666834356, T22, T1Z);
Chris@42 316 R1[WS(rs, 3)] = FNMS(KP1_666834356, T22, T1Z);
Chris@42 317 R1[WS(rs, 8)] = FNMS(KP1_606007150, T24, T23);
Chris@42 318 R0[WS(rs, 6)] = FMA(KP1_606007150, T24, T23);
Chris@42 319 }
Chris@42 320 }
Chris@42 321 }
Chris@42 322
Chris@42 323 static const kr2c_desc desc = { 25, "r2cb_25", {32, 0, 120, 0}, &GENUS };
Chris@42 324
Chris@42 325 void X(codelet_r2cb_25) (planner *p) {
Chris@42 326 X(kr2c_register) (p, r2cb_25, &desc);
Chris@42 327 }
Chris@42 328
Chris@42 329 #else /* HAVE_FMA */
Chris@42 330
Chris@42 331 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include r2cb.h */
Chris@42 332
Chris@42 333 /*
Chris@42 334 * This function contains 152 FP additions, 98 FP multiplications,
Chris@42 335 * (or, 100 additions, 46 multiplications, 52 fused multiply/add),
Chris@42 336 * 65 stack variables, 21 constants, and 50 memory accesses
Chris@42 337 */
Chris@42 338 #include "r2cb.h"
Chris@42 339
Chris@42 340 static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 341 {
Chris@42 342 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 343 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 344 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 345 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 346 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 347 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 348 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 349 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 350 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 351 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 352 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 353 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 354 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 355 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 356 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 357 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 358 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 359 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 360 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@42 361 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@42 362 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@42 363 {
Chris@42 364 INT i;
Chris@42 365 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@42 366 E Tu, T1G, T5, Tr, T1F, TN, TO, Te, TR, T27, T1r, T1N, TG, T26, T1q;
Chris@42 367 E T1K, T1a, T1b, Tn, T1e, T2a, T1u, T1U, T13, T29, T1t, T1R, Ts, Tt;
Chris@42 368 Ts = Ci[WS(csi, 5)];
Chris@42 369 Tt = Ci[WS(csi, 10)];
Chris@42 370 Tu = FMA(KP1_902113032, Ts, KP1_175570504 * Tt);
Chris@42 371 T1G = FNMS(KP1_902113032, Tt, KP1_175570504 * Ts);
Chris@42 372 {
Chris@42 373 E T1, T4, Tp, T2, T3, Tq;
Chris@42 374 T1 = Cr[0];
Chris@42 375 T2 = Cr[WS(csr, 5)];
Chris@42 376 T3 = Cr[WS(csr, 10)];
Chris@42 377 T4 = T2 + T3;
Chris@42 378 Tp = KP1_118033988 * (T2 - T3);
Chris@42 379 T5 = FMA(KP2_000000000, T4, T1);
Chris@42 380 Tq = FNMS(KP500000000, T4, T1);
Chris@42 381 Tr = Tp + Tq;
Chris@42 382 T1F = Tq - Tp;
Chris@42 383 }
Chris@42 384 {
Chris@42 385 E T6, Td, TI, Tw, TH, TB, TE, TM;
Chris@42 386 T6 = Cr[WS(csr, 1)];
Chris@42 387 TN = Ci[WS(csi, 1)];
Chris@42 388 {
Chris@42 389 E T7, T8, T9, Ta, Tb, Tc;
Chris@42 390 T7 = Cr[WS(csr, 6)];
Chris@42 391 T8 = Cr[WS(csr, 4)];
Chris@42 392 T9 = T7 + T8;
Chris@42 393 Ta = Cr[WS(csr, 11)];
Chris@42 394 Tb = Cr[WS(csr, 9)];
Chris@42 395 Tc = Ta + Tb;
Chris@42 396 Td = T9 + Tc;
Chris@42 397 TI = Ta - Tb;
Chris@42 398 Tw = KP559016994 * (T9 - Tc);
Chris@42 399 TH = T7 - T8;
Chris@42 400 }
Chris@42 401 {
Chris@42 402 E Tz, TA, TK, TC, TD, TL;
Chris@42 403 Tz = Ci[WS(csi, 6)];
Chris@42 404 TA = Ci[WS(csi, 4)];
Chris@42 405 TK = Tz - TA;
Chris@42 406 TC = Ci[WS(csi, 11)];
Chris@42 407 TD = Ci[WS(csi, 9)];
Chris@42 408 TL = TC - TD;
Chris@42 409 TB = Tz + TA;
Chris@42 410 TO = TK + TL;
Chris@42 411 TE = TC + TD;
Chris@42 412 TM = KP559016994 * (TK - TL);
Chris@42 413 }
Chris@42 414 Te = T6 + Td;
Chris@42 415 {
Chris@42 416 E TJ, T1L, TQ, T1M, TP;
Chris@42 417 TJ = FMA(KP951056516, TH, KP587785252 * TI);
Chris@42 418 T1L = FNMS(KP951056516, TI, KP587785252 * TH);
Chris@42 419 TP = FNMS(KP250000000, TO, TN);
Chris@42 420 TQ = TM + TP;
Chris@42 421 T1M = TP - TM;
Chris@42 422 TR = TJ + TQ;
Chris@42 423 T27 = T1M - T1L;
Chris@42 424 T1r = TQ - TJ;
Chris@42 425 T1N = T1L + T1M;
Chris@42 426 }
Chris@42 427 {
Chris@42 428 E TF, T1J, Ty, T1I, Tx;
Chris@42 429 TF = FMA(KP951056516, TB, KP587785252 * TE);
Chris@42 430 T1J = FNMS(KP951056516, TE, KP587785252 * TB);
Chris@42 431 Tx = FNMS(KP250000000, Td, T6);
Chris@42 432 Ty = Tw + Tx;
Chris@42 433 T1I = Tx - Tw;
Chris@42 434 TG = Ty - TF;
Chris@42 435 T26 = T1I + T1J;
Chris@42 436 T1q = Ty + TF;
Chris@42 437 T1K = T1I - T1J;
Chris@42 438 }
Chris@42 439 }
Chris@42 440 {
Chris@42 441 E Tf, Tm, T15, TT, T14, TY, T11, T19;
Chris@42 442 Tf = Cr[WS(csr, 2)];
Chris@42 443 T1a = Ci[WS(csi, 2)];
Chris@42 444 {
Chris@42 445 E Tg, Th, Ti, Tj, Tk, Tl;
Chris@42 446 Tg = Cr[WS(csr, 7)];
Chris@42 447 Th = Cr[WS(csr, 3)];
Chris@42 448 Ti = Tg + Th;
Chris@42 449 Tj = Cr[WS(csr, 12)];
Chris@42 450 Tk = Cr[WS(csr, 8)];
Chris@42 451 Tl = Tj + Tk;
Chris@42 452 Tm = Ti + Tl;
Chris@42 453 T15 = Tj - Tk;
Chris@42 454 TT = KP559016994 * (Ti - Tl);
Chris@42 455 T14 = Tg - Th;
Chris@42 456 }
Chris@42 457 {
Chris@42 458 E TW, TX, T17, TZ, T10, T18;
Chris@42 459 TW = Ci[WS(csi, 7)];
Chris@42 460 TX = Ci[WS(csi, 3)];
Chris@42 461 T17 = TW - TX;
Chris@42 462 TZ = Ci[WS(csi, 12)];
Chris@42 463 T10 = Ci[WS(csi, 8)];
Chris@42 464 T18 = TZ - T10;
Chris@42 465 TY = TW + TX;
Chris@42 466 T1b = T17 + T18;
Chris@42 467 T11 = TZ + T10;
Chris@42 468 T19 = KP559016994 * (T17 - T18);
Chris@42 469 }
Chris@42 470 Tn = Tf + Tm;
Chris@42 471 {
Chris@42 472 E T16, T1S, T1d, T1T, T1c;
Chris@42 473 T16 = FMA(KP951056516, T14, KP587785252 * T15);
Chris@42 474 T1S = FNMS(KP951056516, T15, KP587785252 * T14);
Chris@42 475 T1c = FNMS(KP250000000, T1b, T1a);
Chris@42 476 T1d = T19 + T1c;
Chris@42 477 T1T = T1c - T19;
Chris@42 478 T1e = T16 + T1d;
Chris@42 479 T2a = T1T - T1S;
Chris@42 480 T1u = T1d - T16;
Chris@42 481 T1U = T1S + T1T;
Chris@42 482 }
Chris@42 483 {
Chris@42 484 E T12, T1Q, TV, T1P, TU;
Chris@42 485 T12 = FMA(KP951056516, TY, KP587785252 * T11);
Chris@42 486 T1Q = FNMS(KP951056516, T11, KP587785252 * TY);
Chris@42 487 TU = FNMS(KP250000000, Tm, Tf);
Chris@42 488 TV = TT + TU;
Chris@42 489 T1P = TU - TT;
Chris@42 490 T13 = TV - T12;
Chris@42 491 T29 = T1P + T1Q;
Chris@42 492 T1t = TV + T12;
Chris@42 493 T1R = T1P - T1Q;
Chris@42 494 }
Chris@42 495 }
Chris@42 496 {
Chris@42 497 E T2m, To, T2l, T2q, T2s, T2o, T2p, T2r, T2n;
Chris@42 498 T2m = KP1_118033988 * (Te - Tn);
Chris@42 499 To = Te + Tn;
Chris@42 500 T2l = FNMS(KP500000000, To, T5);
Chris@42 501 T2o = TO + TN;
Chris@42 502 T2p = T1b + T1a;
Chris@42 503 T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o);
Chris@42 504 T2s = FMA(KP1_902113032, T2o, KP1_175570504 * T2p);
Chris@42 505 R0[0] = FMA(KP2_000000000, To, T5);
Chris@42 506 T2r = T2m + T2l;
Chris@42 507 R1[WS(rs, 2)] = T2r - T2s;
Chris@42 508 R0[WS(rs, 10)] = T2r + T2s;
Chris@42 509 T2n = T2l - T2m;
Chris@42 510 R0[WS(rs, 5)] = T2n - T2q;
Chris@42 511 R1[WS(rs, 7)] = T2n + T2q;
Chris@42 512 }
Chris@42 513 {
Chris@42 514 E T2i, T2k, T25, T2c, T2d, T2e, T2j, T2f;
Chris@42 515 {
Chris@42 516 E T2g, T2h, T28, T2b;
Chris@42 517 T2g = FMA(KP684547105, T26, KP728968627 * T27);
Chris@42 518 T2h = FMA(KP998026728, T29, KP062790519 * T2a);
Chris@42 519 T2i = FNMS(KP1_902113032, T2h, KP1_175570504 * T2g);
Chris@42 520 T2k = FMA(KP1_902113032, T2g, KP1_175570504 * T2h);
Chris@42 521 T25 = T1F + T1G;
Chris@42 522 T28 = FNMS(KP684547105, T27, KP728968627 * T26);
Chris@42 523 T2b = FNMS(KP998026728, T2a, KP062790519 * T29);
Chris@42 524 T2c = T28 + T2b;
Chris@42 525 T2d = FNMS(KP500000000, T2c, T25);
Chris@42 526 T2e = KP1_118033988 * (T28 - T2b);
Chris@42 527 }
Chris@42 528 R1[WS(rs, 1)] = FMA(KP2_000000000, T2c, T25);
Chris@42 529 T2j = T2e + T2d;
Chris@42 530 R0[WS(rs, 4)] = T2j - T2k;
Chris@42 531 R1[WS(rs, 11)] = T2j + T2k;
Chris@42 532 T2f = T2d - T2e;
Chris@42 533 R1[WS(rs, 6)] = T2f - T2i;
Chris@42 534 R0[WS(rs, 9)] = T2f + T2i;
Chris@42 535 }
Chris@42 536 {
Chris@42 537 E T1m, T1o, Tv, T1g, T1h, T1i, T1n, T1j;
Chris@42 538 {
Chris@42 539 E T1k, T1l, TS, T1f;
Chris@42 540 T1k = FMA(KP248689887, TG, KP968583161 * TR);
Chris@42 541 T1l = FMA(KP481753674, T13, KP876306680 * T1e);
Chris@42 542 T1m = FNMS(KP1_902113032, T1l, KP1_175570504 * T1k);
Chris@42 543 T1o = FMA(KP1_902113032, T1k, KP1_175570504 * T1l);
Chris@42 544 Tv = Tr - Tu;
Chris@42 545 TS = FNMS(KP248689887, TR, KP968583161 * TG);
Chris@42 546 T1f = FNMS(KP481753674, T1e, KP876306680 * T13);
Chris@42 547 T1g = TS + T1f;
Chris@42 548 T1h = FNMS(KP500000000, T1g, Tv);
Chris@42 549 T1i = KP1_118033988 * (TS - T1f);
Chris@42 550 }
Chris@42 551 R1[0] = FMA(KP2_000000000, T1g, Tv);
Chris@42 552 T1n = T1i + T1h;
Chris@42 553 R0[WS(rs, 3)] = T1n - T1o;
Chris@42 554 R1[WS(rs, 10)] = T1n + T1o;
Chris@42 555 T1j = T1h - T1i;
Chris@42 556 R1[WS(rs, 5)] = T1j - T1m;
Chris@42 557 R0[WS(rs, 8)] = T1j + T1m;
Chris@42 558 }
Chris@42 559 {
Chris@42 560 E T1C, T1E, T1p, T1w, T1x, T1y, T1D, T1z;
Chris@42 561 {
Chris@42 562 E T1A, T1B, T1s, T1v;
Chris@42 563 T1A = FMA(KP844327925, T1q, KP535826794 * T1r);
Chris@42 564 T1B = FNMS(KP425779291, T1u, KP904827052 * T1t);
Chris@42 565 T1C = FNMS(KP1_902113032, T1B, KP1_175570504 * T1A);
Chris@42 566 T1E = FMA(KP1_902113032, T1A, KP1_175570504 * T1B);
Chris@42 567 T1p = Tr + Tu;
Chris@42 568 T1s = FNMS(KP844327925, T1r, KP535826794 * T1q);
Chris@42 569 T1v = FMA(KP425779291, T1t, KP904827052 * T1u);
Chris@42 570 T1w = T1s - T1v;
Chris@42 571 T1x = FNMS(KP500000000, T1w, T1p);
Chris@42 572 T1y = KP1_118033988 * (T1s + T1v);
Chris@42 573 }
Chris@42 574 R0[WS(rs, 2)] = FMA(KP2_000000000, T1w, T1p);
Chris@42 575 T1D = T1x + T1y;
Chris@42 576 R1[WS(rs, 4)] = T1D - T1E;
Chris@42 577 R0[WS(rs, 12)] = T1E + T1D;
Chris@42 578 T1z = T1x - T1y;
Chris@42 579 R0[WS(rs, 7)] = T1z - T1C;
Chris@42 580 R1[WS(rs, 9)] = T1C + T1z;
Chris@42 581 }
Chris@42 582 {
Chris@42 583 E T22, T24, T1H, T1W, T1X, T1Y, T23, T1Z;
Chris@42 584 {
Chris@42 585 E T20, T21, T1O, T1V;
Chris@42 586 T20 = FMA(KP481753674, T1K, KP876306680 * T1N);
Chris@42 587 T21 = FMA(KP844327925, T1R, KP535826794 * T1U);
Chris@42 588 T22 = FNMS(KP1_902113032, T21, KP1_175570504 * T20);
Chris@42 589 T24 = FMA(KP1_902113032, T20, KP1_175570504 * T21);
Chris@42 590 T1H = T1F - T1G;
Chris@42 591 T1O = FNMS(KP481753674, T1N, KP876306680 * T1K);
Chris@42 592 T1V = FNMS(KP844327925, T1U, KP535826794 * T1R);
Chris@42 593 T1W = T1O + T1V;
Chris@42 594 T1X = FNMS(KP500000000, T1W, T1H);
Chris@42 595 T1Y = KP1_118033988 * (T1O - T1V);
Chris@42 596 }
Chris@42 597 R0[WS(rs, 1)] = FMA(KP2_000000000, T1W, T1H);
Chris@42 598 T23 = T1Y + T1X;
Chris@42 599 R1[WS(rs, 3)] = T23 - T24;
Chris@42 600 R0[WS(rs, 11)] = T23 + T24;
Chris@42 601 T1Z = T1X - T1Y;
Chris@42 602 R0[WS(rs, 6)] = T1Z - T22;
Chris@42 603 R1[WS(rs, 8)] = T1Z + T22;
Chris@42 604 }
Chris@42 605 }
Chris@42 606 }
Chris@42 607 }
Chris@42 608
Chris@42 609 static const kr2c_desc desc = { 25, "r2cb_25", {100, 46, 52, 0}, &GENUS };
Chris@42 610
Chris@42 611 void X(codelet_r2cb_25) (planner *p) {
Chris@42 612 X(kr2c_register) (p, r2cb_25, &desc);
Chris@42 613 }
Chris@42 614
Chris@42 615 #endif /* HAVE_FMA */