annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cbIII_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:51:09 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cbIII_25 -dft-III -include r2cbIII.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 152 FP additions, 120 FP multiplications,
Chris@42 32 * (or, 32 additions, 0 multiplications, 120 fused multiply/add),
Chris@42 33 * 115 stack variables, 44 constants, and 50 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cbIII.h"
Chris@42 36
Chris@42 37 static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP979740652, +0.979740652857618686258237536568998933733477632);
Chris@42 40 DK(KP438153340, +0.438153340021931793654057951961031291699532119);
Chris@42 41 DK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@42 42 DK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@42 43 DK(KP1_721083328, +1.721083328735889354196523361841037632825608373);
Chris@42 44 DK(KP1_606007150, +1.606007150877320829666881187140752009270929701);
Chris@42 45 DK(KP1_011627398, +1.011627398597394192215998921771049272931807941);
Chris@42 46 DK(KP641441904, +0.641441904830606407298806329068862424939687989);
Chris@42 47 DK(KP595480289, +0.595480289600000014706716770488118292997907308);
Chris@42 48 DK(KP452413526, +0.452413526233009763856834323966348796985206956);
Chris@42 49 DK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@42 50 DK(KP933137358, +0.933137358350283770603023973254446451924190884);
Chris@42 51 DK(KP1_666834356, +1.666834356657377354817925100486477686277992119);
Chris@42 52 DK(KP1_842354653, +1.842354653930286640500894870830132058718564461);
Chris@42 53 DK(KP1_082908895, +1.082908895072625554092571180165639018104066379);
Chris@42 54 DK(KP576710603, +0.576710603632765877371579268136471017090111488);
Chris@42 55 DK(KP662318342, +0.662318342759882818626911127577439236802190210);
Chris@42 56 DK(KP484291580, +0.484291580564315559745084187732367906918006201);
Chris@42 57 DK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@42 58 DK(KP1_898359647, +1.898359647016882523151110931686726543423167685);
Chris@42 59 DK(KP1_386580726, +1.386580726567734802700860150804827247498955921);
Chris@42 60 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 61 DK(KP1_115827804, +1.115827804063668528375399296931134075984874304);
Chris@42 62 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@42 63 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@42 64 DK(KP499013364, +0.499013364214135780976168403431725276668452610);
Chris@42 65 DK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@42 66 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 67 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 68 DK(KP730409924, +0.730409924561256563751459444999838399157094302);
Chris@42 69 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@42 70 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@42 71 DK(KP451418159, +0.451418159099103183892477933432151804893354132);
Chris@42 72 DK(KP846146756, +0.846146756728608505452954290121135880883743802);
Chris@42 73 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@42 74 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@42 75 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@42 76 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 77 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 78 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 79 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@42 80 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 81 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 82 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 83 {
Chris@42 84 INT i;
Chris@42 85 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@42 86 E T1P, T2c, T2a, T24, T26, T25, T27, T2b;
Chris@42 87 {
Chris@42 88 E T1O, TS, T5, T1N, TP, Te, TA, T2i, T1V, T17, T1B, T2h, T1S, T10, T1C;
Chris@42 89 E T1a, T19, Tn, T1h, T1l, T1Y, T1e, T21, TJ, T1g;
Chris@42 90 {
Chris@42 91 E T1, T2, T3, TQ, TR;
Chris@42 92 TQ = Ci[WS(csi, 7)];
Chris@42 93 TR = Ci[WS(csi, 2)];
Chris@42 94 T1 = Cr[WS(csr, 12)];
Chris@42 95 T2 = Cr[WS(csr, 7)];
Chris@42 96 T3 = Cr[WS(csr, 2)];
Chris@42 97 T1O = FNMS(KP618033988, TQ, TR);
Chris@42 98 TS = FMA(KP618033988, TR, TQ);
Chris@42 99 {
Chris@42 100 E TV, TU, T1U, T16, T12, T1R, TZ, T11;
Chris@42 101 {
Chris@42 102 E T6, Tz, T14, T15, TX, Tu, Td, Tx, TY, T4, TO, Ty;
Chris@42 103 T6 = Cr[WS(csr, 11)];
Chris@42 104 T4 = T2 + T3;
Chris@42 105 TO = T3 - T2;
Chris@42 106 Tz = Ci[WS(csi, 11)];
Chris@42 107 {
Chris@42 108 E Ta, T9, Tb, T7, T8, TN;
Chris@42 109 T7 = Cr[WS(csr, 6)];
Chris@42 110 T8 = Cr[WS(csr, 8)];
Chris@42 111 TN = FNMS(KP500000000, T4, T1);
Chris@42 112 T5 = FMA(KP2_000000000, T4, T1);
Chris@42 113 Ta = Cr[WS(csr, 1)];
Chris@42 114 T14 = T8 - T7;
Chris@42 115 T9 = T7 + T8;
Chris@42 116 T1N = FMA(KP1_118033988, TO, TN);
Chris@42 117 TP = FNMS(KP1_118033988, TO, TN);
Chris@42 118 Tb = Cr[WS(csr, 3)];
Chris@42 119 {
Chris@42 120 E Tv, Tw, Ts, Tt, Tc;
Chris@42 121 Ts = Ci[WS(csi, 8)];
Chris@42 122 Tt = Ci[WS(csi, 6)];
Chris@42 123 T15 = Tb - Ta;
Chris@42 124 Tc = Ta + Tb;
Chris@42 125 Tv = Ci[WS(csi, 3)];
Chris@42 126 TX = Tt + Ts;
Chris@42 127 Tu = Ts - Tt;
Chris@42 128 Tw = Ci[WS(csi, 1)];
Chris@42 129 Td = T9 + Tc;
Chris@42 130 TV = Tc - T9;
Chris@42 131 Tx = Tv - Tw;
Chris@42 132 TY = Tw + Tv;
Chris@42 133 }
Chris@42 134 }
Chris@42 135 Te = T6 + Td;
Chris@42 136 TU = FMS(KP250000000, Td, T6);
Chris@42 137 T1U = FNMS(KP618033988, T14, T15);
Chris@42 138 T16 = FMA(KP618033988, T15, T14);
Chris@42 139 T12 = Tx - Tu;
Chris@42 140 Ty = Tu + Tx;
Chris@42 141 T1R = FNMS(KP618033988, TX, TY);
Chris@42 142 TZ = FMA(KP618033988, TY, TX);
Chris@42 143 TA = Ty - Tz;
Chris@42 144 T11 = FMA(KP250000000, Ty, Tz);
Chris@42 145 }
Chris@42 146 {
Chris@42 147 E Tf, TI, T1j, T1k, Tm, T1c, TD, TG, T1d, TH;
Chris@42 148 Tf = Cr[WS(csr, 10)];
Chris@42 149 TI = Ci[WS(csi, 10)];
Chris@42 150 {
Chris@42 151 E T13, T1T, TW, T1Q;
Chris@42 152 T13 = FMA(KP559016994, T12, T11);
Chris@42 153 T1T = FNMS(KP559016994, T12, T11);
Chris@42 154 TW = FMA(KP559016994, TV, TU);
Chris@42 155 T1Q = FNMS(KP559016994, TV, TU);
Chris@42 156 T2i = FMA(KP951056516, T1U, T1T);
Chris@42 157 T1V = FNMS(KP951056516, T1U, T1T);
Chris@42 158 T17 = FMA(KP951056516, T16, T13);
Chris@42 159 T1B = FNMS(KP951056516, T16, T13);
Chris@42 160 T2h = FNMS(KP951056516, T1R, T1Q);
Chris@42 161 T1S = FMA(KP951056516, T1R, T1Q);
Chris@42 162 T10 = FNMS(KP951056516, TZ, TW);
Chris@42 163 T1C = FMA(KP951056516, TZ, TW);
Chris@42 164 {
Chris@42 165 E Tg, Th, Tj, Tk;
Chris@42 166 Tg = Cr[WS(csr, 5)];
Chris@42 167 Th = Cr[WS(csr, 9)];
Chris@42 168 Tj = Cr[0];
Chris@42 169 Tk = Cr[WS(csr, 4)];
Chris@42 170 {
Chris@42 171 E TB, Ti, Tl, TC, TE, TF;
Chris@42 172 TB = Ci[WS(csi, 9)];
Chris@42 173 T1j = Tg - Th;
Chris@42 174 Ti = Tg + Th;
Chris@42 175 T1k = Tk - Tj;
Chris@42 176 Tl = Tj + Tk;
Chris@42 177 TC = Ci[WS(csi, 5)];
Chris@42 178 TE = Ci[WS(csi, 4)];
Chris@42 179 TF = Ci[0];
Chris@42 180 Tm = Ti + Tl;
Chris@42 181 T1a = Ti - Tl;
Chris@42 182 T1c = TC + TB;
Chris@42 183 TD = TB - TC;
Chris@42 184 TG = TE - TF;
Chris@42 185 T1d = TF + TE;
Chris@42 186 }
Chris@42 187 }
Chris@42 188 }
Chris@42 189 T19 = FMS(KP250000000, Tm, Tf);
Chris@42 190 Tn = Tf + Tm;
Chris@42 191 T1h = TD - TG;
Chris@42 192 TH = TD + TG;
Chris@42 193 T1l = FNMS(KP618033988, T1k, T1j);
Chris@42 194 T1Y = FMA(KP618033988, T1j, T1k);
Chris@42 195 T1e = FMA(KP618033988, T1d, T1c);
Chris@42 196 T21 = FNMS(KP618033988, T1c, T1d);
Chris@42 197 TJ = TH - TI;
Chris@42 198 T1g = FMA(KP250000000, TH, TI);
Chris@42 199 }
Chris@42 200 }
Chris@42 201 }
Chris@42 202 {
Chris@42 203 E T1Z, T1m, T1y, T22, T1f, T1z, T2j, T2g, T2d, T2q, T2s;
Chris@42 204 {
Chris@42 205 E Tq, To, T2e, T2f;
Chris@42 206 Tq = Tn - Te;
Chris@42 207 To = Te + Tn;
Chris@42 208 {
Chris@42 209 E T1i, T1X, T1b, T20;
Chris@42 210 T1i = FNMS(KP559016994, T1h, T1g);
Chris@42 211 T1X = FMA(KP559016994, T1h, T1g);
Chris@42 212 T1b = FNMS(KP559016994, T1a, T19);
Chris@42 213 T20 = FMA(KP559016994, T1a, T19);
Chris@42 214 T2e = FMA(KP951056516, T1Y, T1X);
Chris@42 215 T1Z = FNMS(KP951056516, T1Y, T1X);
Chris@42 216 T1m = FNMS(KP951056516, T1l, T1i);
Chris@42 217 T1y = FMA(KP951056516, T1l, T1i);
Chris@42 218 T2f = FNMS(KP951056516, T21, T20);
Chris@42 219 T22 = FMA(KP951056516, T21, T20);
Chris@42 220 T1f = FNMS(KP951056516, T1e, T1b);
Chris@42 221 T1z = FMA(KP951056516, T1e, T1b);
Chris@42 222 }
Chris@42 223 {
Chris@42 224 E T2o, TK, TM, T2p, Tr, TL, Tp;
Chris@42 225 T2o = FMA(KP939062505, T2h, T2i);
Chris@42 226 T2j = FNMS(KP939062505, T2i, T2h);
Chris@42 227 R0[0] = FMA(KP2_000000000, To, T5);
Chris@42 228 Tp = FNMS(KP500000000, To, T5);
Chris@42 229 TK = FMA(KP618033988, TJ, TA);
Chris@42 230 TM = FNMS(KP618033988, TA, TJ);
Chris@42 231 T2g = FNMS(KP062914667, T2f, T2e);
Chris@42 232 T2p = FMA(KP062914667, T2e, T2f);
Chris@42 233 Tr = FNMS(KP1_118033988, Tq, Tp);
Chris@42 234 TL = FMA(KP1_118033988, Tq, Tp);
Chris@42 235 T2d = FMA(KP1_902113032, T1O, T1N);
Chris@42 236 T1P = FNMS(KP1_902113032, T1O, T1N);
Chris@42 237 T2q = FMA(KP846146756, T2p, T2o);
Chris@42 238 T2s = FNMS(KP451418159, T2o, T2p);
Chris@42 239 R0[WS(rs, 10)] = FMA(KP1_902113032, TK, Tr);
Chris@42 240 R1[WS(rs, 2)] = FMS(KP1_902113032, TK, Tr);
Chris@42 241 R1[WS(rs, 7)] = FMS(KP1_902113032, TM, TL);
Chris@42 242 R0[WS(rs, 5)] = FMA(KP1_902113032, TM, TL);
Chris@42 243 }
Chris@42 244 }
Chris@42 245 {
Chris@42 246 E T18, T1n, T1x, TT, T2m, T1w, T1u, T2l, T1s, T1t, T2k;
Chris@42 247 T18 = FNMS(KP256756360, T17, T10);
Chris@42 248 T1s = FMA(KP256756360, T10, T17);
Chris@42 249 T1t = FMA(KP549754652, T1f, T1m);
Chris@42 250 T1n = FNMS(KP549754652, T1m, T1f);
Chris@42 251 T1x = FNMS(KP1_902113032, TS, TP);
Chris@42 252 TT = FMA(KP1_902113032, TS, TP);
Chris@42 253 T2m = FMA(KP730409924, T2j, T2g);
Chris@42 254 T2k = FNMS(KP730409924, T2j, T2g);
Chris@42 255 T1w = FNMS(KP683113946, T1s, T1t);
Chris@42 256 T1u = FMA(KP559154169, T1t, T1s);
Chris@42 257 R1[WS(rs, 1)] = -(FMA(KP1_996053456, T2k, T2d));
Chris@42 258 T2l = FNMS(KP499013364, T2k, T2d);
Chris@42 259 {
Chris@42 260 E T1K, T1M, T1G, T1E;
Chris@42 261 {
Chris@42 262 E T1D, T1A, T1q, T1p, T1v, T1r;
Chris@42 263 {
Chris@42 264 E T1I, T1J, T2n, T2r, T1o;
Chris@42 265 T1I = FMA(KP634619297, T1B, T1C);
Chris@42 266 T1D = FNMS(KP634619297, T1C, T1B);
Chris@42 267 T1A = FMA(KP470564281, T1z, T1y);
Chris@42 268 T1J = FNMS(KP470564281, T1y, T1z);
Chris@42 269 T2n = FNMS(KP1_115827804, T2m, T2l);
Chris@42 270 T2r = FMA(KP1_115827804, T2m, T2l);
Chris@42 271 T1q = FNMS(KP904730450, T1n, T18);
Chris@42 272 T1o = FMA(KP904730450, T1n, T18);
Chris@42 273 R1[WS(rs, 11)] = FMS(KP1_386580726, T2q, T2n);
Chris@42 274 R0[WS(rs, 4)] = FMA(KP1_386580726, T2q, T2n);
Chris@42 275 R0[WS(rs, 9)] = FMA(KP1_898359647, T2s, T2r);
Chris@42 276 R1[WS(rs, 6)] = FMS(KP1_898359647, T2s, T2r);
Chris@42 277 R1[0] = FMS(KP1_937166322, T1o, TT);
Chris@42 278 T1p = FMA(KP484291580, T1o, TT);
Chris@42 279 T1K = FMA(KP662318342, T1J, T1I);
Chris@42 280 T1M = FNMS(KP576710603, T1I, T1J);
Chris@42 281 }
Chris@42 282 T1v = FMA(KP1_082908895, T1q, T1p);
Chris@42 283 T1r = FNMS(KP1_082908895, T1q, T1p);
Chris@42 284 R1[WS(rs, 10)] = FMS(KP1_842354653, T1u, T1r);
Chris@42 285 R0[WS(rs, 3)] = FMA(KP1_842354653, T1u, T1r);
Chris@42 286 R0[WS(rs, 8)] = FMA(KP1_666834356, T1w, T1v);
Chris@42 287 R1[WS(rs, 5)] = FMS(KP1_666834356, T1w, T1v);
Chris@42 288 T1G = FNMS(KP933137358, T1D, T1A);
Chris@42 289 T1E = FMA(KP933137358, T1D, T1A);
Chris@42 290 }
Chris@42 291 {
Chris@42 292 E T23, T28, T29, T1W, T1F, T1H, T1L;
Chris@42 293 T23 = FNMS(KP634619297, T22, T1Z);
Chris@42 294 T28 = FMA(KP634619297, T1Z, T22);
Chris@42 295 T29 = FMA(KP549754652, T1S, T1V);
Chris@42 296 T1W = FNMS(KP549754652, T1V, T1S);
Chris@42 297 R0[WS(rs, 2)] = FMA(KP1_809654104, T1E, T1x);
Chris@42 298 T1F = FNMS(KP452413526, T1E, T1x);
Chris@42 299 T2c = FMA(KP595480289, T28, T29);
Chris@42 300 T2a = FNMS(KP641441904, T29, T28);
Chris@42 301 T1H = FNMS(KP1_011627398, T1G, T1F);
Chris@42 302 T1L = FMA(KP1_011627398, T1G, T1F);
Chris@42 303 R0[WS(rs, 12)] = FNMS(KP1_606007150, T1K, T1H);
Chris@42 304 R1[WS(rs, 4)] = -(FMA(KP1_606007150, T1K, T1H));
Chris@42 305 R1[WS(rs, 9)] = -(FMA(KP1_721083328, T1M, T1L));
Chris@42 306 R0[WS(rs, 7)] = FNMS(KP1_721083328, T1M, T1L);
Chris@42 307 T24 = FNMS(KP963507348, T23, T1W);
Chris@42 308 T26 = FMA(KP963507348, T23, T1W);
Chris@42 309 }
Chris@42 310 }
Chris@42 311 }
Chris@42 312 }
Chris@42 313 }
Chris@42 314 R0[WS(rs, 1)] = FNMS(KP1_752613360, T24, T1P);
Chris@42 315 T25 = FMA(KP438153340, T24, T1P);
Chris@42 316 T27 = FMA(KP979740652, T26, T25);
Chris@42 317 T2b = FNMS(KP979740652, T26, T25);
Chris@42 318 R1[WS(rs, 8)] = -(FMA(KP1_606007150, T2a, T27));
Chris@42 319 R0[WS(rs, 6)] = FNMS(KP1_606007150, T2a, T27);
Chris@42 320 R1[WS(rs, 3)] = -(FMA(KP1_666834356, T2c, T2b));
Chris@42 321 R0[WS(rs, 11)] = FNMS(KP1_666834356, T2c, T2b);
Chris@42 322 }
Chris@42 323 }
Chris@42 324 }
Chris@42 325
Chris@42 326 static const kr2c_desc desc = { 25, "r2cbIII_25", {32, 0, 120, 0}, &GENUS };
Chris@42 327
Chris@42 328 void X(codelet_r2cbIII_25) (planner *p) {
Chris@42 329 X(kr2c_register) (p, r2cbIII_25, &desc);
Chris@42 330 }
Chris@42 331
Chris@42 332 #else /* HAVE_FMA */
Chris@42 333
Chris@42 334 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cbIII_25 -dft-III -include r2cbIII.h */
Chris@42 335
Chris@42 336 /*
Chris@42 337 * This function contains 152 FP additions, 98 FP multiplications,
Chris@42 338 * (or, 100 additions, 46 multiplications, 52 fused multiply/add),
Chris@42 339 * 65 stack variables, 21 constants, and 50 memory accesses
Chris@42 340 */
Chris@42 341 #include "r2cbIII.h"
Chris@42 342
Chris@42 343 static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 344 {
Chris@42 345 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 346 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 347 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 348 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 349 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 350 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 351 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 352 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 353 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 354 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 355 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 356 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 357 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 358 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 359 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 360 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 361 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@42 362 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 363 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@42 364 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@42 365 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@42 366 {
Chris@42 367 INT i;
Chris@42 368 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@42 369 E TS, T1O, T5, TP, T1N, TI, TH, Te, T17, T2h, T1y, T1V, T10, T2g, T1x;
Chris@42 370 E T1S, Tz, Ty, Tn, T1m, T2e, T1B, T22, T1f, T2d, T1A, T1Z, TQ, TR;
Chris@42 371 TQ = Ci[WS(csi, 2)];
Chris@42 372 TR = Ci[WS(csi, 7)];
Chris@42 373 TS = FNMS(KP1_175570504, TR, KP1_902113032 * TQ);
Chris@42 374 T1O = FMA(KP1_902113032, TR, KP1_175570504 * TQ);
Chris@42 375 {
Chris@42 376 E T1, T4, TN, T2, T3, TO;
Chris@42 377 T1 = Cr[WS(csr, 12)];
Chris@42 378 T2 = Cr[WS(csr, 7)];
Chris@42 379 T3 = Cr[WS(csr, 2)];
Chris@42 380 T4 = T2 + T3;
Chris@42 381 TN = KP1_118033988 * (T3 - T2);
Chris@42 382 T5 = FMA(KP2_000000000, T4, T1);
Chris@42 383 TO = FMS(KP500000000, T4, T1);
Chris@42 384 TP = TN - TO;
Chris@42 385 T1N = TO + TN;
Chris@42 386 }
Chris@42 387 {
Chris@42 388 E T6, Td, T15, TU, T14, T11, TX, TY;
Chris@42 389 T6 = Cr[WS(csr, 11)];
Chris@42 390 TI = Ci[WS(csi, 11)];
Chris@42 391 {
Chris@42 392 E T7, T8, T9, Ta, Tb, Tc;
Chris@42 393 T7 = Cr[WS(csr, 6)];
Chris@42 394 T8 = Cr[WS(csr, 8)];
Chris@42 395 T9 = T7 + T8;
Chris@42 396 Ta = Cr[WS(csr, 1)];
Chris@42 397 Tb = Cr[WS(csr, 3)];
Chris@42 398 Tc = Ta + Tb;
Chris@42 399 Td = T9 + Tc;
Chris@42 400 T15 = Ta - Tb;
Chris@42 401 TU = KP559016994 * (Tc - T9);
Chris@42 402 T14 = T8 - T7;
Chris@42 403 }
Chris@42 404 {
Chris@42 405 E TB, TC, TD, TE, TF, TG;
Chris@42 406 TB = Ci[WS(csi, 6)];
Chris@42 407 TC = Ci[WS(csi, 8)];
Chris@42 408 TD = TB - TC;
Chris@42 409 TE = Ci[WS(csi, 1)];
Chris@42 410 TF = Ci[WS(csi, 3)];
Chris@42 411 TG = TE - TF;
Chris@42 412 TH = TD + TG;
Chris@42 413 T11 = KP559016994 * (TD - TG);
Chris@42 414 TX = TB + TC;
Chris@42 415 TY = TE + TF;
Chris@42 416 }
Chris@42 417 Te = T6 + Td;
Chris@42 418 {
Chris@42 419 E T16, T1T, T13, T1U, T12;
Chris@42 420 T16 = FMA(KP587785252, T14, KP951056516 * T15);
Chris@42 421 T1T = FNMS(KP587785252, T15, KP951056516 * T14);
Chris@42 422 T12 = FNMS(KP250000000, TH, TI);
Chris@42 423 T13 = T11 - T12;
Chris@42 424 T1U = T11 + T12;
Chris@42 425 T17 = T13 - T16;
Chris@42 426 T2h = T1T - T1U;
Chris@42 427 T1y = T16 + T13;
Chris@42 428 T1V = T1T + T1U;
Chris@42 429 }
Chris@42 430 {
Chris@42 431 E TZ, T1R, TW, T1Q, TV;
Chris@42 432 TZ = FNMS(KP951056516, TY, KP587785252 * TX);
Chris@42 433 T1R = FMA(KP951056516, TX, KP587785252 * TY);
Chris@42 434 TV = FMS(KP250000000, Td, T6);
Chris@42 435 TW = TU - TV;
Chris@42 436 T1Q = TV + TU;
Chris@42 437 T10 = TW + TZ;
Chris@42 438 T2g = T1Q + T1R;
Chris@42 439 T1x = TZ - TW;
Chris@42 440 T1S = T1Q - T1R;
Chris@42 441 }
Chris@42 442 }
Chris@42 443 {
Chris@42 444 E Tf, Tm, T1k, T19, T1j, T1g, T1c, T1d;
Chris@42 445 Tf = Cr[WS(csr, 10)];
Chris@42 446 Tz = Ci[WS(csi, 10)];
Chris@42 447 {
Chris@42 448 E Tg, Th, Ti, Tj, Tk, Tl;
Chris@42 449 Tg = Cr[WS(csr, 5)];
Chris@42 450 Th = Cr[WS(csr, 9)];
Chris@42 451 Ti = Tg + Th;
Chris@42 452 Tj = Cr[0];
Chris@42 453 Tk = Cr[WS(csr, 4)];
Chris@42 454 Tl = Tj + Tk;
Chris@42 455 Tm = Ti + Tl;
Chris@42 456 T1k = Tj - Tk;
Chris@42 457 T19 = KP559016994 * (Tl - Ti);
Chris@42 458 T1j = Th - Tg;
Chris@42 459 }
Chris@42 460 {
Chris@42 461 E Ts, Tt, Tu, Tv, Tw, Tx;
Chris@42 462 Ts = Ci[WS(csi, 4)];
Chris@42 463 Tt = Ci[0];
Chris@42 464 Tu = Ts - Tt;
Chris@42 465 Tv = Ci[WS(csi, 5)];
Chris@42 466 Tw = Ci[WS(csi, 9)];
Chris@42 467 Tx = Tv - Tw;
Chris@42 468 Ty = Tu - Tx;
Chris@42 469 T1g = KP559016994 * (Tx + Tu);
Chris@42 470 T1c = Tv + Tw;
Chris@42 471 T1d = Tt + Ts;
Chris@42 472 }
Chris@42 473 Tn = Tf + Tm;
Chris@42 474 {
Chris@42 475 E T1l, T20, T1i, T21, T1h;
Chris@42 476 T1l = FMA(KP587785252, T1j, KP951056516 * T1k);
Chris@42 477 T20 = FNMS(KP587785252, T1k, KP951056516 * T1j);
Chris@42 478 T1h = FMA(KP250000000, Ty, Tz);
Chris@42 479 T1i = T1g - T1h;
Chris@42 480 T21 = T1g + T1h;
Chris@42 481 T1m = T1i - T1l;
Chris@42 482 T2e = T21 - T20;
Chris@42 483 T1B = T1l + T1i;
Chris@42 484 T22 = T20 + T21;
Chris@42 485 }
Chris@42 486 {
Chris@42 487 E T1e, T1Y, T1b, T1X, T1a;
Chris@42 488 T1e = FNMS(KP951056516, T1d, KP587785252 * T1c);
Chris@42 489 T1Y = FMA(KP951056516, T1c, KP587785252 * T1d);
Chris@42 490 T1a = FMS(KP250000000, Tm, Tf);
Chris@42 491 T1b = T19 - T1a;
Chris@42 492 T1X = T1a + T19;
Chris@42 493 T1f = T1b + T1e;
Chris@42 494 T2d = T1X + T1Y;
Chris@42 495 T1A = T1e - T1b;
Chris@42 496 T1Z = T1X - T1Y;
Chris@42 497 }
Chris@42 498 }
Chris@42 499 {
Chris@42 500 E Tq, To, Tp, TK, TM, TA, TJ, TL, Tr;
Chris@42 501 Tq = KP1_118033988 * (Tn - Te);
Chris@42 502 To = Te + Tn;
Chris@42 503 Tp = FMS(KP500000000, To, T5);
Chris@42 504 TA = Ty - Tz;
Chris@42 505 TJ = TH + TI;
Chris@42 506 TK = FNMS(KP1_902113032, TJ, KP1_175570504 * TA);
Chris@42 507 TM = FMA(KP1_175570504, TJ, KP1_902113032 * TA);
Chris@42 508 R0[0] = FMA(KP2_000000000, To, T5);
Chris@42 509 TL = Tq - Tp;
Chris@42 510 R0[WS(rs, 5)] = TL + TM;
Chris@42 511 R1[WS(rs, 7)] = TM - TL;
Chris@42 512 Tr = Tp + Tq;
Chris@42 513 R1[WS(rs, 2)] = Tr + TK;
Chris@42 514 R0[WS(rs, 10)] = TK - Tr;
Chris@42 515 }
Chris@42 516 {
Chris@42 517 E T2q, T2s, T2k, T2j, T2l, T2m, T2r, T2n;
Chris@42 518 {
Chris@42 519 E T2o, T2p, T2f, T2i;
Chris@42 520 T2o = FNMS(KP904827052, T2d, KP425779291 * T2e);
Chris@42 521 T2p = FNMS(KP535826794, T2h, KP844327925 * T2g);
Chris@42 522 T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o);
Chris@42 523 T2s = FMA(KP1_175570504, T2p, KP1_902113032 * T2o);
Chris@42 524 T2k = T1N + T1O;
Chris@42 525 T2f = FMA(KP425779291, T2d, KP904827052 * T2e);
Chris@42 526 T2i = FMA(KP535826794, T2g, KP844327925 * T2h);
Chris@42 527 T2j = T2f - T2i;
Chris@42 528 T2l = FMA(KP500000000, T2j, T2k);
Chris@42 529 T2m = KP1_118033988 * (T2i + T2f);
Chris@42 530 }
Chris@42 531 R0[WS(rs, 2)] = FMS(KP2_000000000, T2j, T2k);
Chris@42 532 T2r = T2m - T2l;
Chris@42 533 R0[WS(rs, 7)] = T2r + T2s;
Chris@42 534 R1[WS(rs, 9)] = T2s - T2r;
Chris@42 535 T2n = T2l + T2m;
Chris@42 536 R1[WS(rs, 4)] = T2n + T2q;
Chris@42 537 R0[WS(rs, 12)] = T2q - T2n;
Chris@42 538 }
Chris@42 539 {
Chris@42 540 E T1u, T1w, TT, T1o, T1p, T1q, T1v, T1r;
Chris@42 541 {
Chris@42 542 E T1s, T1t, T18, T1n;
Chris@42 543 T1s = FMA(KP481753674, T10, KP876306680 * T17);
Chris@42 544 T1t = FMA(KP844327925, T1f, KP535826794 * T1m);
Chris@42 545 T1u = FMA(KP1_902113032, T1s, KP1_175570504 * T1t);
Chris@42 546 T1w = FNMS(KP1_175570504, T1s, KP1_902113032 * T1t);
Chris@42 547 TT = TP - TS;
Chris@42 548 T18 = FNMS(KP481753674, T17, KP876306680 * T10);
Chris@42 549 T1n = FNMS(KP844327925, T1m, KP535826794 * T1f);
Chris@42 550 T1o = T18 + T1n;
Chris@42 551 T1p = FMS(KP500000000, T1o, TT);
Chris@42 552 T1q = KP1_118033988 * (T1n - T18);
Chris@42 553 }
Chris@42 554 R0[WS(rs, 1)] = FMA(KP2_000000000, T1o, TT);
Chris@42 555 T1v = T1q - T1p;
Chris@42 556 R0[WS(rs, 6)] = T1v + T1w;
Chris@42 557 R1[WS(rs, 8)] = T1w - T1v;
Chris@42 558 T1r = T1p + T1q;
Chris@42 559 R1[WS(rs, 3)] = T1r + T1u;
Chris@42 560 R0[WS(rs, 11)] = T1u - T1r;
Chris@42 561 }
Chris@42 562 {
Chris@42 563 E T1H, T1L, T1E, T1D, T1I, T1J, T1M, T1K;
Chris@42 564 {
Chris@42 565 E T1F, T1G, T1z, T1C;
Chris@42 566 T1F = FNMS(KP062790519, T1B, KP998026728 * T1A);
Chris@42 567 T1G = FNMS(KP684547105, T1x, KP728968627 * T1y);
Chris@42 568 T1H = FNMS(KP1_902113032, T1G, KP1_175570504 * T1F);
Chris@42 569 T1L = FMA(KP1_175570504, T1G, KP1_902113032 * T1F);
Chris@42 570 T1E = TP + TS;
Chris@42 571 T1z = FMA(KP728968627, T1x, KP684547105 * T1y);
Chris@42 572 T1C = FMA(KP062790519, T1A, KP998026728 * T1B);
Chris@42 573 T1D = T1z + T1C;
Chris@42 574 T1I = FMA(KP500000000, T1D, T1E);
Chris@42 575 T1J = KP1_118033988 * (T1C - T1z);
Chris@42 576 }
Chris@42 577 R1[WS(rs, 1)] = FMS(KP2_000000000, T1D, T1E);
Chris@42 578 T1M = T1J - T1I;
Chris@42 579 R0[WS(rs, 9)] = T1L - T1M;
Chris@42 580 R1[WS(rs, 6)] = T1L + T1M;
Chris@42 581 T1K = T1I + T1J;
Chris@42 582 R1[WS(rs, 11)] = T1H - T1K;
Chris@42 583 R0[WS(rs, 4)] = T1H + T1K;
Chris@42 584 }
Chris@42 585 {
Chris@42 586 E T2a, T2c, T1P, T24, T25, T26, T2b, T27;
Chris@42 587 {
Chris@42 588 E T28, T29, T1W, T23;
Chris@42 589 T28 = FMA(KP248689887, T1S, KP968583161 * T1V);
Chris@42 590 T29 = FMA(KP481753674, T1Z, KP876306680 * T22);
Chris@42 591 T2a = FMA(KP1_902113032, T28, KP1_175570504 * T29);
Chris@42 592 T2c = FNMS(KP1_175570504, T28, KP1_902113032 * T29);
Chris@42 593 T1P = T1N - T1O;
Chris@42 594 T1W = FNMS(KP248689887, T1V, KP968583161 * T1S);
Chris@42 595 T23 = FNMS(KP481753674, T22, KP876306680 * T1Z);
Chris@42 596 T24 = T1W + T23;
Chris@42 597 T25 = FMS(KP500000000, T24, T1P);
Chris@42 598 T26 = KP1_118033988 * (T23 - T1W);
Chris@42 599 }
Chris@42 600 R1[0] = FMA(KP2_000000000, T24, T1P);
Chris@42 601 T2b = T26 - T25;
Chris@42 602 R1[WS(rs, 5)] = T2b + T2c;
Chris@42 603 R0[WS(rs, 8)] = T2c - T2b;
Chris@42 604 T27 = T25 + T26;
Chris@42 605 R0[WS(rs, 3)] = T27 + T2a;
Chris@42 606 R1[WS(rs, 10)] = T2a - T27;
Chris@42 607 }
Chris@42 608 }
Chris@42 609 }
Chris@42 610 }
Chris@42 611
Chris@42 612 static const kr2c_desc desc = { 25, "r2cbIII_25", {100, 46, 52, 0}, &GENUS };
Chris@42 613
Chris@42 614 void X(codelet_r2cbIII_25) (planner *p) {
Chris@42 615 X(kr2c_register) (p, r2cbIII_25, &desc);
Chris@42 616 }
Chris@42 617
Chris@42 618 #endif /* HAVE_FMA */