annotate src/fftw-3.3.8/rdft/scalar/r2cb/r2cb_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:30 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include rdft/scalar/r2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 152 FP additions, 120 FP multiplications,
Chris@82 32 * (or, 32 additions, 0 multiplications, 120 fused multiply/add),
Chris@82 33 * 88 stack variables, 44 constants, and 50 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/r2cb.h"
Chris@82 36
Chris@82 37 static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP979740652, +0.979740652857618686258237536568998933733477632);
Chris@82 40 DK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@82 41 DK(KP438153340, +0.438153340021931793654057951961031291699532119);
Chris@82 42 DK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@82 43 DK(KP641441904, +0.641441904830606407298806329068862424939687989);
Chris@82 44 DK(KP595480289, +0.595480289600000014706716770488118292997907308);
Chris@82 45 DK(KP1_721083328, +1.721083328735889354196523361841037632825608373);
Chris@82 46 DK(KP1_606007150, +1.606007150877320829666881187140752009270929701);
Chris@82 47 DK(KP1_011627398, +1.011627398597394192215998921771049272931807941);
Chris@82 48 DK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@82 49 DK(KP452413526, +0.452413526233009763856834323966348796985206956);
Chris@82 50 DK(KP933137358, +0.933137358350283770603023973254446451924190884);
Chris@82 51 DK(KP662318342, +0.662318342759882818626911127577439236802190210);
Chris@82 52 DK(KP576710603, +0.576710603632765877371579268136471017090111488);
Chris@82 53 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 54 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 55 DK(KP1_842354653, +1.842354653930286640500894870830132058718564461);
Chris@82 56 DK(KP1_666834356, +1.666834356657377354817925100486477686277992119);
Chris@82 57 DK(KP1_082908895, +1.082908895072625554092571180165639018104066379);
Chris@82 58 DK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@82 59 DK(KP484291580, +0.484291580564315559745084187732367906918006201);
Chris@82 60 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 61 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 62 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 63 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 64 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 65 DK(KP1_386580726, +1.386580726567734802700860150804827247498955921);
Chris@82 66 DK(KP1_898359647, +1.898359647016882523151110931686726543423167685);
Chris@82 67 DK(KP1_115827804, +1.115827804063668528375399296931134075984874304);
Chris@82 68 DK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@82 69 DK(KP499013364, +0.499013364214135780976168403431725276668452610);
Chris@82 70 DK(KP730409924, +0.730409924561256563751459444999838399157094302);
Chris@82 71 DK(KP451418159, +0.451418159099103183892477933432151804893354132);
Chris@82 72 DK(KP846146756, +0.846146756728608505452954290121135880883743802);
Chris@82 73 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 74 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 75 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 76 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 77 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 78 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 79 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 80 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 81 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 82 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 83 {
Chris@82 84 INT i;
Chris@82 85 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@82 86 E Tu, T1G, T5, Tr, T1F, TH, TK, Te, TR, T2a, T1t, T1N, TG, T29, T1u;
Chris@82 87 E T1K, T14, T17, Tn, T1e, T26, T1q, T1R, T13, T27, T1r, T1U, Ts, Tt;
Chris@82 88 Ts = Ci[WS(csi, 5)];
Chris@82 89 Tt = Ci[WS(csi, 10)];
Chris@82 90 Tu = FMA(KP618033988, Tt, Ts);
Chris@82 91 T1G = FMS(KP618033988, Ts, Tt);
Chris@82 92 {
Chris@82 93 E T1, T4, Tq, T2, T3, Tp;
Chris@82 94 T1 = Cr[0];
Chris@82 95 T2 = Cr[WS(csr, 5)];
Chris@82 96 T3 = Cr[WS(csr, 10)];
Chris@82 97 T4 = T2 + T3;
Chris@82 98 Tq = T2 - T3;
Chris@82 99 T5 = FMA(KP2_000000000, T4, T1);
Chris@82 100 Tp = FNMS(KP500000000, T4, T1);
Chris@82 101 Tr = FMA(KP1_118033988, Tq, Tp);
Chris@82 102 T1F = FNMS(KP1_118033988, Tq, Tp);
Chris@82 103 }
Chris@82 104 {
Chris@82 105 E T6, Td, TP, Tx, TO, TB, TM, TE;
Chris@82 106 T6 = Cr[WS(csr, 1)];
Chris@82 107 TH = Ci[WS(csi, 1)];
Chris@82 108 {
Chris@82 109 E T7, T8, T9, Ta, Tb, Tc;
Chris@82 110 T7 = Cr[WS(csr, 6)];
Chris@82 111 T8 = Cr[WS(csr, 4)];
Chris@82 112 T9 = T7 + T8;
Chris@82 113 Ta = Cr[WS(csr, 11)];
Chris@82 114 Tb = Cr[WS(csr, 9)];
Chris@82 115 Tc = Ta + Tb;
Chris@82 116 Td = T9 + Tc;
Chris@82 117 TP = Tb - Ta;
Chris@82 118 Tx = T9 - Tc;
Chris@82 119 TO = T7 - T8;
Chris@82 120 }
Chris@82 121 {
Chris@82 122 E Tz, TA, TI, TC, TD, TJ;
Chris@82 123 Tz = Ci[WS(csi, 6)];
Chris@82 124 TA = Ci[WS(csi, 4)];
Chris@82 125 TI = Tz - TA;
Chris@82 126 TC = Ci[WS(csi, 11)];
Chris@82 127 TD = Ci[WS(csi, 9)];
Chris@82 128 TJ = TC - TD;
Chris@82 129 TB = Tz + TA;
Chris@82 130 TM = TI - TJ;
Chris@82 131 TE = TC + TD;
Chris@82 132 TK = TI + TJ;
Chris@82 133 }
Chris@82 134 Te = T6 + Td;
Chris@82 135 {
Chris@82 136 E TQ, T1M, TN, T1L, TL;
Chris@82 137 TQ = FNMS(KP618033988, TP, TO);
Chris@82 138 T1M = FMA(KP618033988, TO, TP);
Chris@82 139 TL = FNMS(KP250000000, TK, TH);
Chris@82 140 TN = FMA(KP559016994, TM, TL);
Chris@82 141 T1L = FNMS(KP559016994, TM, TL);
Chris@82 142 TR = FMA(KP951056516, TQ, TN);
Chris@82 143 T2a = FNMS(KP951056516, T1M, T1L);
Chris@82 144 T1t = FNMS(KP951056516, TQ, TN);
Chris@82 145 T1N = FMA(KP951056516, T1M, T1L);
Chris@82 146 }
Chris@82 147 {
Chris@82 148 E TF, T1J, Ty, T1I, Tw;
Chris@82 149 TF = FMA(KP618033988, TE, TB);
Chris@82 150 T1J = FNMS(KP618033988, TB, TE);
Chris@82 151 Tw = FNMS(KP250000000, Td, T6);
Chris@82 152 Ty = FMA(KP559016994, Tx, Tw);
Chris@82 153 T1I = FNMS(KP559016994, Tx, Tw);
Chris@82 154 TG = FNMS(KP951056516, TF, Ty);
Chris@82 155 T29 = FNMS(KP951056516, T1J, T1I);
Chris@82 156 T1u = FMA(KP951056516, TF, Ty);
Chris@82 157 T1K = FMA(KP951056516, T1J, T1I);
Chris@82 158 }
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E Tf, Tm, T1c, TU, T1b, TY, T19, T11;
Chris@82 162 Tf = Cr[WS(csr, 2)];
Chris@82 163 T14 = Ci[WS(csi, 2)];
Chris@82 164 {
Chris@82 165 E Tg, Th, Ti, Tj, Tk, Tl;
Chris@82 166 Tg = Cr[WS(csr, 7)];
Chris@82 167 Th = Cr[WS(csr, 3)];
Chris@82 168 Ti = Tg + Th;
Chris@82 169 Tj = Cr[WS(csr, 12)];
Chris@82 170 Tk = Cr[WS(csr, 8)];
Chris@82 171 Tl = Tj + Tk;
Chris@82 172 Tm = Ti + Tl;
Chris@82 173 T1c = Tj - Tk;
Chris@82 174 TU = Tl - Ti;
Chris@82 175 T1b = Th - Tg;
Chris@82 176 }
Chris@82 177 {
Chris@82 178 E TW, TX, T15, TZ, T10, T16;
Chris@82 179 TW = Ci[WS(csi, 7)];
Chris@82 180 TX = Ci[WS(csi, 3)];
Chris@82 181 T15 = TW - TX;
Chris@82 182 TZ = Ci[WS(csi, 12)];
Chris@82 183 T10 = Ci[WS(csi, 8)];
Chris@82 184 T16 = TZ - T10;
Chris@82 185 TY = TW + TX;
Chris@82 186 T19 = T16 - T15;
Chris@82 187 T11 = TZ + T10;
Chris@82 188 T17 = T15 + T16;
Chris@82 189 }
Chris@82 190 Tn = Tf + Tm;
Chris@82 191 {
Chris@82 192 E T1d, T1Q, T1a, T1P, T18;
Chris@82 193 T1d = FNMS(KP618033988, T1c, T1b);
Chris@82 194 T1Q = FMA(KP618033988, T1b, T1c);
Chris@82 195 T18 = FNMS(KP250000000, T17, T14);
Chris@82 196 T1a = FNMS(KP559016994, T19, T18);
Chris@82 197 T1P = FMA(KP559016994, T19, T18);
Chris@82 198 T1e = FNMS(KP951056516, T1d, T1a);
Chris@82 199 T26 = FMA(KP951056516, T1Q, T1P);
Chris@82 200 T1q = FMA(KP951056516, T1d, T1a);
Chris@82 201 T1R = FNMS(KP951056516, T1Q, T1P);
Chris@82 202 }
Chris@82 203 {
Chris@82 204 E T12, T1T, TV, T1S, TT;
Chris@82 205 T12 = FMA(KP618033988, T11, TY);
Chris@82 206 T1T = FNMS(KP618033988, TY, T11);
Chris@82 207 TT = FNMS(KP250000000, Tm, Tf);
Chris@82 208 TV = FNMS(KP559016994, TU, TT);
Chris@82 209 T1S = FMA(KP559016994, TU, TT);
Chris@82 210 T13 = FNMS(KP951056516, T12, TV);
Chris@82 211 T27 = FNMS(KP951056516, T1T, T1S);
Chris@82 212 T1r = FMA(KP951056516, T12, TV);
Chris@82 213 T1U = FMA(KP951056516, T1T, T1S);
Chris@82 214 }
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E T2m, To, T2l, T2q, T2s, T2o, T2p, T2r, T2n;
Chris@82 218 T2m = Te - Tn;
Chris@82 219 To = Te + Tn;
Chris@82 220 T2l = FNMS(KP500000000, To, T5);
Chris@82 221 T2o = TK + TH;
Chris@82 222 T2p = T17 + T14;
Chris@82 223 T2q = FMA(KP618033988, T2p, T2o);
Chris@82 224 T2s = FNMS(KP618033988, T2o, T2p);
Chris@82 225 R0[0] = FMA(KP2_000000000, To, T5);
Chris@82 226 T2r = FNMS(KP1_118033988, T2m, T2l);
Chris@82 227 R1[WS(rs, 7)] = FNMS(KP1_902113032, T2s, T2r);
Chris@82 228 R0[WS(rs, 5)] = FMA(KP1_902113032, T2s, T2r);
Chris@82 229 T2n = FMA(KP1_118033988, T2m, T2l);
Chris@82 230 R1[WS(rs, 2)] = FNMS(KP1_902113032, T2q, T2n);
Chris@82 231 R0[WS(rs, 10)] = FMA(KP1_902113032, T2q, T2n);
Chris@82 232 }
Chris@82 233 {
Chris@82 234 E T2i, T2k, T25, T2c, T2d, T2e, T2j, T2f;
Chris@82 235 {
Chris@82 236 E T2g, T2h, T28, T2b;
Chris@82 237 T2g = FMA(KP939062505, T29, T2a);
Chris@82 238 T2h = FMA(KP062914667, T26, T27);
Chris@82 239 T2i = FMA(KP846146756, T2h, T2g);
Chris@82 240 T2k = FNMS(KP451418159, T2g, T2h);
Chris@82 241 T25 = FMA(KP1_902113032, T1G, T1F);
Chris@82 242 T28 = FNMS(KP062914667, T27, T26);
Chris@82 243 T2b = FNMS(KP939062505, T2a, T29);
Chris@82 244 T2c = FNMS(KP730409924, T2b, T28);
Chris@82 245 T2d = FMA(KP499013364, T2c, T25);
Chris@82 246 T2e = FMA(KP730409924, T2b, T28);
Chris@82 247 }
Chris@82 248 R1[WS(rs, 1)] = FNMS(KP1_996053456, T2c, T25);
Chris@82 249 T2j = FNMS(KP1_115827804, T2e, T2d);
Chris@82 250 R0[WS(rs, 9)] = FNMS(KP1_898359647, T2k, T2j);
Chris@82 251 R1[WS(rs, 6)] = FMA(KP1_898359647, T2k, T2j);
Chris@82 252 T2f = FMA(KP1_115827804, T2e, T2d);
Chris@82 253 R0[WS(rs, 4)] = FNMS(KP1_386580726, T2i, T2f);
Chris@82 254 R1[WS(rs, 11)] = FMA(KP1_386580726, T2i, T2f);
Chris@82 255 }
Chris@82 256 {
Chris@82 257 E T1m, T1o, Tv, T1g, T1h, T1i, T1n, T1j;
Chris@82 258 {
Chris@82 259 E T1k, T1l, TS, T1f;
Chris@82 260 T1k = FMA(KP256756360, TG, TR);
Chris@82 261 T1l = FMA(KP549754652, T13, T1e);
Chris@82 262 T1m = FMA(KP559154169, T1l, T1k);
Chris@82 263 T1o = FNMS(KP683113946, T1k, T1l);
Chris@82 264 Tv = FNMS(KP1_902113032, Tu, Tr);
Chris@82 265 TS = FNMS(KP256756360, TR, TG);
Chris@82 266 T1f = FNMS(KP549754652, T1e, T13);
Chris@82 267 T1g = FMA(KP904730450, T1f, TS);
Chris@82 268 T1h = FNMS(KP484291580, T1g, Tv);
Chris@82 269 T1i = FNMS(KP904730450, T1f, TS);
Chris@82 270 }
Chris@82 271 R1[0] = FMA(KP1_937166322, T1g, Tv);
Chris@82 272 T1n = FNMS(KP1_082908895, T1i, T1h);
Chris@82 273 R0[WS(rs, 8)] = FNMS(KP1_666834356, T1o, T1n);
Chris@82 274 R1[WS(rs, 5)] = FMA(KP1_666834356, T1o, T1n);
Chris@82 275 T1j = FMA(KP1_082908895, T1i, T1h);
Chris@82 276 R0[WS(rs, 3)] = FNMS(KP1_842354653, T1m, T1j);
Chris@82 277 R1[WS(rs, 10)] = FMA(KP1_842354653, T1m, T1j);
Chris@82 278 }
Chris@82 279 {
Chris@82 280 E T1C, T1E, T1p, T1w, T1x, T1y, T1D, T1z;
Chris@82 281 {
Chris@82 282 E T1A, T1B, T1s, T1v;
Chris@82 283 T1A = FNMS(KP470564281, T1q, T1r);
Chris@82 284 T1B = FMA(KP634619297, T1t, T1u);
Chris@82 285 T1C = FNMS(KP576710603, T1B, T1A);
Chris@82 286 T1E = FMA(KP662318342, T1A, T1B);
Chris@82 287 T1p = FMA(KP1_902113032, Tu, Tr);
Chris@82 288 T1s = FMA(KP470564281, T1r, T1q);
Chris@82 289 T1v = FNMS(KP634619297, T1u, T1t);
Chris@82 290 T1w = FMA(KP933137358, T1v, T1s);
Chris@82 291 T1x = FMA(KP452413526, T1w, T1p);
Chris@82 292 T1y = FNMS(KP933137358, T1v, T1s);
Chris@82 293 }
Chris@82 294 R0[WS(rs, 2)] = FNMS(KP1_809654104, T1w, T1p);
Chris@82 295 T1D = FMA(KP1_011627398, T1y, T1x);
Chris@82 296 R1[WS(rs, 4)] = FNMS(KP1_606007150, T1E, T1D);
Chris@82 297 R0[WS(rs, 12)] = FMA(KP1_606007150, T1E, T1D);
Chris@82 298 T1z = FNMS(KP1_011627398, T1y, T1x);
Chris@82 299 R0[WS(rs, 7)] = FMA(KP1_721083328, T1C, T1z);
Chris@82 300 R1[WS(rs, 9)] = FNMS(KP1_721083328, T1C, T1z);
Chris@82 301 }
Chris@82 302 {
Chris@82 303 E T22, T24, T1H, T1W, T1X, T1Y, T23, T1Z;
Chris@82 304 {
Chris@82 305 E T20, T21, T1O, T1V;
Chris@82 306 T20 = FMA(KP549754652, T1K, T1N);
Chris@82 307 T21 = FMA(KP634619297, T1R, T1U);
Chris@82 308 T22 = FMA(KP595480289, T21, T20);
Chris@82 309 T24 = FNMS(KP641441904, T20, T21);
Chris@82 310 T1H = FNMS(KP1_902113032, T1G, T1F);
Chris@82 311 T1O = FNMS(KP549754652, T1N, T1K);
Chris@82 312 T1V = FNMS(KP634619297, T1U, T1R);
Chris@82 313 T1W = FNMS(KP963507348, T1V, T1O);
Chris@82 314 T1X = FNMS(KP438153340, T1W, T1H);
Chris@82 315 T1Y = FMA(KP963507348, T1V, T1O);
Chris@82 316 }
Chris@82 317 R0[WS(rs, 1)] = FMA(KP1_752613360, T1W, T1H);
Chris@82 318 T23 = FNMS(KP979740652, T1Y, T1X);
Chris@82 319 R0[WS(rs, 6)] = FMA(KP1_606007150, T24, T23);
Chris@82 320 R1[WS(rs, 8)] = FNMS(KP1_606007150, T24, T23);
Chris@82 321 T1Z = FMA(KP979740652, T1Y, T1X);
Chris@82 322 R1[WS(rs, 3)] = FNMS(KP1_666834356, T22, T1Z);
Chris@82 323 R0[WS(rs, 11)] = FMA(KP1_666834356, T22, T1Z);
Chris@82 324 }
Chris@82 325 }
Chris@82 326 }
Chris@82 327 }
Chris@82 328
Chris@82 329 static const kr2c_desc desc = { 25, "r2cb_25", {32, 0, 120, 0}, &GENUS };
Chris@82 330
Chris@82 331 void X(codelet_r2cb_25) (planner *p) {
Chris@82 332 X(kr2c_register) (p, r2cb_25, &desc);
Chris@82 333 }
Chris@82 334
Chris@82 335 #else
Chris@82 336
Chris@82 337 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 25 -name r2cb_25 -include rdft/scalar/r2cb.h */
Chris@82 338
Chris@82 339 /*
Chris@82 340 * This function contains 152 FP additions, 98 FP multiplications,
Chris@82 341 * (or, 100 additions, 46 multiplications, 52 fused multiply/add),
Chris@82 342 * 65 stack variables, 21 constants, and 50 memory accesses
Chris@82 343 */
Chris@82 344 #include "rdft/scalar/r2cb.h"
Chris@82 345
Chris@82 346 static void r2cb_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@82 347 {
Chris@82 348 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 349 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 350 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 351 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 352 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 353 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 354 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 355 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 356 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 357 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 358 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 359 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 360 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 361 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 362 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 363 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 364 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 365 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 366 DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
Chris@82 367 DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
Chris@82 368 DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
Chris@82 369 {
Chris@82 370 INT i;
Chris@82 371 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) {
Chris@82 372 E Tu, T1G, T5, Tr, T1F, TN, TO, Te, TR, T27, T1r, T1N, TG, T26, T1q;
Chris@82 373 E T1K, T1a, T1b, Tn, T1e, T2a, T1u, T1U, T13, T29, T1t, T1R, Ts, Tt;
Chris@82 374 Ts = Ci[WS(csi, 5)];
Chris@82 375 Tt = Ci[WS(csi, 10)];
Chris@82 376 Tu = FMA(KP1_902113032, Ts, KP1_175570504 * Tt);
Chris@82 377 T1G = FNMS(KP1_902113032, Tt, KP1_175570504 * Ts);
Chris@82 378 {
Chris@82 379 E T1, T4, Tp, T2, T3, Tq;
Chris@82 380 T1 = Cr[0];
Chris@82 381 T2 = Cr[WS(csr, 5)];
Chris@82 382 T3 = Cr[WS(csr, 10)];
Chris@82 383 T4 = T2 + T3;
Chris@82 384 Tp = KP1_118033988 * (T2 - T3);
Chris@82 385 T5 = FMA(KP2_000000000, T4, T1);
Chris@82 386 Tq = FNMS(KP500000000, T4, T1);
Chris@82 387 Tr = Tp + Tq;
Chris@82 388 T1F = Tq - Tp;
Chris@82 389 }
Chris@82 390 {
Chris@82 391 E T6, Td, TI, Tw, TH, TB, TE, TM;
Chris@82 392 T6 = Cr[WS(csr, 1)];
Chris@82 393 TN = Ci[WS(csi, 1)];
Chris@82 394 {
Chris@82 395 E T7, T8, T9, Ta, Tb, Tc;
Chris@82 396 T7 = Cr[WS(csr, 6)];
Chris@82 397 T8 = Cr[WS(csr, 4)];
Chris@82 398 T9 = T7 + T8;
Chris@82 399 Ta = Cr[WS(csr, 11)];
Chris@82 400 Tb = Cr[WS(csr, 9)];
Chris@82 401 Tc = Ta + Tb;
Chris@82 402 Td = T9 + Tc;
Chris@82 403 TI = Ta - Tb;
Chris@82 404 Tw = KP559016994 * (T9 - Tc);
Chris@82 405 TH = T7 - T8;
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E Tz, TA, TK, TC, TD, TL;
Chris@82 409 Tz = Ci[WS(csi, 6)];
Chris@82 410 TA = Ci[WS(csi, 4)];
Chris@82 411 TK = Tz - TA;
Chris@82 412 TC = Ci[WS(csi, 11)];
Chris@82 413 TD = Ci[WS(csi, 9)];
Chris@82 414 TL = TC - TD;
Chris@82 415 TB = Tz + TA;
Chris@82 416 TO = TK + TL;
Chris@82 417 TE = TC + TD;
Chris@82 418 TM = KP559016994 * (TK - TL);
Chris@82 419 }
Chris@82 420 Te = T6 + Td;
Chris@82 421 {
Chris@82 422 E TJ, T1L, TQ, T1M, TP;
Chris@82 423 TJ = FMA(KP951056516, TH, KP587785252 * TI);
Chris@82 424 T1L = FNMS(KP951056516, TI, KP587785252 * TH);
Chris@82 425 TP = FNMS(KP250000000, TO, TN);
Chris@82 426 TQ = TM + TP;
Chris@82 427 T1M = TP - TM;
Chris@82 428 TR = TJ + TQ;
Chris@82 429 T27 = T1M - T1L;
Chris@82 430 T1r = TQ - TJ;
Chris@82 431 T1N = T1L + T1M;
Chris@82 432 }
Chris@82 433 {
Chris@82 434 E TF, T1J, Ty, T1I, Tx;
Chris@82 435 TF = FMA(KP951056516, TB, KP587785252 * TE);
Chris@82 436 T1J = FNMS(KP951056516, TE, KP587785252 * TB);
Chris@82 437 Tx = FNMS(KP250000000, Td, T6);
Chris@82 438 Ty = Tw + Tx;
Chris@82 439 T1I = Tx - Tw;
Chris@82 440 TG = Ty - TF;
Chris@82 441 T26 = T1I + T1J;
Chris@82 442 T1q = Ty + TF;
Chris@82 443 T1K = T1I - T1J;
Chris@82 444 }
Chris@82 445 }
Chris@82 446 {
Chris@82 447 E Tf, Tm, T15, TT, T14, TY, T11, T19;
Chris@82 448 Tf = Cr[WS(csr, 2)];
Chris@82 449 T1a = Ci[WS(csi, 2)];
Chris@82 450 {
Chris@82 451 E Tg, Th, Ti, Tj, Tk, Tl;
Chris@82 452 Tg = Cr[WS(csr, 7)];
Chris@82 453 Th = Cr[WS(csr, 3)];
Chris@82 454 Ti = Tg + Th;
Chris@82 455 Tj = Cr[WS(csr, 12)];
Chris@82 456 Tk = Cr[WS(csr, 8)];
Chris@82 457 Tl = Tj + Tk;
Chris@82 458 Tm = Ti + Tl;
Chris@82 459 T15 = Tj - Tk;
Chris@82 460 TT = KP559016994 * (Ti - Tl);
Chris@82 461 T14 = Tg - Th;
Chris@82 462 }
Chris@82 463 {
Chris@82 464 E TW, TX, T17, TZ, T10, T18;
Chris@82 465 TW = Ci[WS(csi, 7)];
Chris@82 466 TX = Ci[WS(csi, 3)];
Chris@82 467 T17 = TW - TX;
Chris@82 468 TZ = Ci[WS(csi, 12)];
Chris@82 469 T10 = Ci[WS(csi, 8)];
Chris@82 470 T18 = TZ - T10;
Chris@82 471 TY = TW + TX;
Chris@82 472 T1b = T17 + T18;
Chris@82 473 T11 = TZ + T10;
Chris@82 474 T19 = KP559016994 * (T17 - T18);
Chris@82 475 }
Chris@82 476 Tn = Tf + Tm;
Chris@82 477 {
Chris@82 478 E T16, T1S, T1d, T1T, T1c;
Chris@82 479 T16 = FMA(KP951056516, T14, KP587785252 * T15);
Chris@82 480 T1S = FNMS(KP951056516, T15, KP587785252 * T14);
Chris@82 481 T1c = FNMS(KP250000000, T1b, T1a);
Chris@82 482 T1d = T19 + T1c;
Chris@82 483 T1T = T1c - T19;
Chris@82 484 T1e = T16 + T1d;
Chris@82 485 T2a = T1T - T1S;
Chris@82 486 T1u = T1d - T16;
Chris@82 487 T1U = T1S + T1T;
Chris@82 488 }
Chris@82 489 {
Chris@82 490 E T12, T1Q, TV, T1P, TU;
Chris@82 491 T12 = FMA(KP951056516, TY, KP587785252 * T11);
Chris@82 492 T1Q = FNMS(KP951056516, T11, KP587785252 * TY);
Chris@82 493 TU = FNMS(KP250000000, Tm, Tf);
Chris@82 494 TV = TT + TU;
Chris@82 495 T1P = TU - TT;
Chris@82 496 T13 = TV - T12;
Chris@82 497 T29 = T1P + T1Q;
Chris@82 498 T1t = TV + T12;
Chris@82 499 T1R = T1P - T1Q;
Chris@82 500 }
Chris@82 501 }
Chris@82 502 {
Chris@82 503 E T2m, To, T2l, T2q, T2s, T2o, T2p, T2r, T2n;
Chris@82 504 T2m = KP1_118033988 * (Te - Tn);
Chris@82 505 To = Te + Tn;
Chris@82 506 T2l = FNMS(KP500000000, To, T5);
Chris@82 507 T2o = TO + TN;
Chris@82 508 T2p = T1b + T1a;
Chris@82 509 T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o);
Chris@82 510 T2s = FMA(KP1_902113032, T2o, KP1_175570504 * T2p);
Chris@82 511 R0[0] = FMA(KP2_000000000, To, T5);
Chris@82 512 T2r = T2m + T2l;
Chris@82 513 R1[WS(rs, 2)] = T2r - T2s;
Chris@82 514 R0[WS(rs, 10)] = T2r + T2s;
Chris@82 515 T2n = T2l - T2m;
Chris@82 516 R0[WS(rs, 5)] = T2n - T2q;
Chris@82 517 R1[WS(rs, 7)] = T2n + T2q;
Chris@82 518 }
Chris@82 519 {
Chris@82 520 E T2i, T2k, T25, T2c, T2d, T2e, T2j, T2f;
Chris@82 521 {
Chris@82 522 E T2g, T2h, T28, T2b;
Chris@82 523 T2g = FMA(KP684547105, T26, KP728968627 * T27);
Chris@82 524 T2h = FMA(KP998026728, T29, KP062790519 * T2a);
Chris@82 525 T2i = FNMS(KP1_902113032, T2h, KP1_175570504 * T2g);
Chris@82 526 T2k = FMA(KP1_902113032, T2g, KP1_175570504 * T2h);
Chris@82 527 T25 = T1F + T1G;
Chris@82 528 T28 = FNMS(KP684547105, T27, KP728968627 * T26);
Chris@82 529 T2b = FNMS(KP998026728, T2a, KP062790519 * T29);
Chris@82 530 T2c = T28 + T2b;
Chris@82 531 T2d = FNMS(KP500000000, T2c, T25);
Chris@82 532 T2e = KP1_118033988 * (T28 - T2b);
Chris@82 533 }
Chris@82 534 R1[WS(rs, 1)] = FMA(KP2_000000000, T2c, T25);
Chris@82 535 T2j = T2e + T2d;
Chris@82 536 R0[WS(rs, 4)] = T2j - T2k;
Chris@82 537 R1[WS(rs, 11)] = T2j + T2k;
Chris@82 538 T2f = T2d - T2e;
Chris@82 539 R1[WS(rs, 6)] = T2f - T2i;
Chris@82 540 R0[WS(rs, 9)] = T2f + T2i;
Chris@82 541 }
Chris@82 542 {
Chris@82 543 E T1m, T1o, Tv, T1g, T1h, T1i, T1n, T1j;
Chris@82 544 {
Chris@82 545 E T1k, T1l, TS, T1f;
Chris@82 546 T1k = FMA(KP248689887, TG, KP968583161 * TR);
Chris@82 547 T1l = FMA(KP481753674, T13, KP876306680 * T1e);
Chris@82 548 T1m = FNMS(KP1_902113032, T1l, KP1_175570504 * T1k);
Chris@82 549 T1o = FMA(KP1_902113032, T1k, KP1_175570504 * T1l);
Chris@82 550 Tv = Tr - Tu;
Chris@82 551 TS = FNMS(KP248689887, TR, KP968583161 * TG);
Chris@82 552 T1f = FNMS(KP481753674, T1e, KP876306680 * T13);
Chris@82 553 T1g = TS + T1f;
Chris@82 554 T1h = FNMS(KP500000000, T1g, Tv);
Chris@82 555 T1i = KP1_118033988 * (TS - T1f);
Chris@82 556 }
Chris@82 557 R1[0] = FMA(KP2_000000000, T1g, Tv);
Chris@82 558 T1n = T1i + T1h;
Chris@82 559 R0[WS(rs, 3)] = T1n - T1o;
Chris@82 560 R1[WS(rs, 10)] = T1n + T1o;
Chris@82 561 T1j = T1h - T1i;
Chris@82 562 R1[WS(rs, 5)] = T1j - T1m;
Chris@82 563 R0[WS(rs, 8)] = T1j + T1m;
Chris@82 564 }
Chris@82 565 {
Chris@82 566 E T1C, T1E, T1p, T1w, T1x, T1y, T1D, T1z;
Chris@82 567 {
Chris@82 568 E T1A, T1B, T1s, T1v;
Chris@82 569 T1A = FMA(KP844327925, T1q, KP535826794 * T1r);
Chris@82 570 T1B = FNMS(KP425779291, T1u, KP904827052 * T1t);
Chris@82 571 T1C = FNMS(KP1_902113032, T1B, KP1_175570504 * T1A);
Chris@82 572 T1E = FMA(KP1_902113032, T1A, KP1_175570504 * T1B);
Chris@82 573 T1p = Tr + Tu;
Chris@82 574 T1s = FNMS(KP844327925, T1r, KP535826794 * T1q);
Chris@82 575 T1v = FMA(KP425779291, T1t, KP904827052 * T1u);
Chris@82 576 T1w = T1s - T1v;
Chris@82 577 T1x = FNMS(KP500000000, T1w, T1p);
Chris@82 578 T1y = KP1_118033988 * (T1s + T1v);
Chris@82 579 }
Chris@82 580 R0[WS(rs, 2)] = FMA(KP2_000000000, T1w, T1p);
Chris@82 581 T1D = T1x + T1y;
Chris@82 582 R1[WS(rs, 4)] = T1D - T1E;
Chris@82 583 R0[WS(rs, 12)] = T1E + T1D;
Chris@82 584 T1z = T1x - T1y;
Chris@82 585 R0[WS(rs, 7)] = T1z - T1C;
Chris@82 586 R1[WS(rs, 9)] = T1C + T1z;
Chris@82 587 }
Chris@82 588 {
Chris@82 589 E T22, T24, T1H, T1W, T1X, T1Y, T23, T1Z;
Chris@82 590 {
Chris@82 591 E T20, T21, T1O, T1V;
Chris@82 592 T20 = FMA(KP481753674, T1K, KP876306680 * T1N);
Chris@82 593 T21 = FMA(KP844327925, T1R, KP535826794 * T1U);
Chris@82 594 T22 = FNMS(KP1_902113032, T21, KP1_175570504 * T20);
Chris@82 595 T24 = FMA(KP1_902113032, T20, KP1_175570504 * T21);
Chris@82 596 T1H = T1F - T1G;
Chris@82 597 T1O = FNMS(KP481753674, T1N, KP876306680 * T1K);
Chris@82 598 T1V = FNMS(KP844327925, T1U, KP535826794 * T1R);
Chris@82 599 T1W = T1O + T1V;
Chris@82 600 T1X = FNMS(KP500000000, T1W, T1H);
Chris@82 601 T1Y = KP1_118033988 * (T1O - T1V);
Chris@82 602 }
Chris@82 603 R0[WS(rs, 1)] = FMA(KP2_000000000, T1W, T1H);
Chris@82 604 T23 = T1Y + T1X;
Chris@82 605 R1[WS(rs, 3)] = T23 - T24;
Chris@82 606 R0[WS(rs, 11)] = T23 + T24;
Chris@82 607 T1Z = T1X - T1Y;
Chris@82 608 R0[WS(rs, 6)] = T1Z - T22;
Chris@82 609 R1[WS(rs, 8)] = T1Z + T22;
Chris@82 610 }
Chris@82 611 }
Chris@82 612 }
Chris@82 613 }
Chris@82 614
Chris@82 615 static const kr2c_desc desc = { 25, "r2cb_25", {100, 46, 52, 0}, &GENUS };
Chris@82 616
Chris@82 617 void X(codelet_r2cb_25) (planner *p) {
Chris@82 618 X(kr2c_register) (p, r2cb_25, &desc);
Chris@82 619 }
Chris@82 620
Chris@82 621 #endif