annotate src/fftw-3.3.3/dft/scalar/codelets/n1_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:35:46 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include n.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 352 FP additions, 268 FP multiplications,
Chris@10 32 * (or, 84 additions, 0 multiplications, 268 fused multiply/add),
Chris@10 33 * 164 stack variables, 47 constants, and 100 memory accesses
Chris@10 34 */
Chris@10 35 #include "n.h"
Chris@10 36
Chris@10 37 static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@10 40 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@10 41 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@10 42 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@10 43 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@10 44 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 45 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@10 46 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@10 47 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@10 48 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@10 49 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@10 50 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@10 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@10 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@10 53 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@10 54 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@10 55 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@10 56 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@10 57 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@10 58 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@10 59 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@10 60 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@10 61 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@10 62 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 63 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@10 64 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@10 65 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@10 66 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@10 67 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@10 68 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@10 69 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@10 70 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@10 71 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@10 72 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@10 73 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@10 74 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 75 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@10 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@10 77 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@10 78 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@10 79 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@10 80 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@10 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@10 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 86 {
Chris@10 87 INT i;
Chris@10 88 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) {
Chris@10 89 E T3Y, T3U, T3W, T42, T44, T3X, T3R, T3V, T3Z, T43;
Chris@10 90 {
Chris@10 91 E T4Q, T1U, T9, T3b, T45, T3e, T46, T1D, T4P, T1R, Ts, T1K, T18, T1E, T4z;
Chris@10 92 E T5f, T3z, T22, T4s, T5b, T3C, T2o, T3D, T2h, T4p, T5c, T4w, T5e, T3A, T29;
Chris@10 93 E T2z, T2y, TL, T1L, T1r, T1F, T4a, T57, T3v, T2x, T4k, T55, T3s, T2T, T2D;
Chris@10 94 E T4c, T3t, T2M, T4h, T54, T1v, T1C, T1Q;
Chris@10 95 {
Chris@10 96 E T1, T2, T3, T5, T6;
Chris@10 97 T1 = ri[0];
Chris@10 98 T2 = ri[WS(is, 5)];
Chris@10 99 T3 = ri[WS(is, 20)];
Chris@10 100 T5 = ri[WS(is, 10)];
Chris@10 101 T6 = ri[WS(is, 15)];
Chris@10 102 {
Chris@10 103 E T3a, T3c, T1y, T1z, T1A, T39, T4, T1S, T1B, T3d;
Chris@10 104 T1v = ii[0];
Chris@10 105 T4 = T2 + T3;
Chris@10 106 T1S = T2 - T3;
Chris@10 107 {
Chris@10 108 E T7, T1T, T8, T1w, T1x;
Chris@10 109 T7 = T5 + T6;
Chris@10 110 T1T = T5 - T6;
Chris@10 111 T1w = ii[WS(is, 5)];
Chris@10 112 T1x = ii[WS(is, 20)];
Chris@10 113 T4Q = FNMS(KP618033988, T1S, T1T);
Chris@10 114 T1U = FMA(KP618033988, T1T, T1S);
Chris@10 115 T8 = T4 + T7;
Chris@10 116 T3a = T4 - T7;
Chris@10 117 T3c = T1w - T1x;
Chris@10 118 T1y = T1w + T1x;
Chris@10 119 T1z = ii[WS(is, 10)];
Chris@10 120 T1A = ii[WS(is, 15)];
Chris@10 121 T39 = FNMS(KP250000000, T8, T1);
Chris@10 122 T9 = T1 + T8;
Chris@10 123 }
Chris@10 124 T1B = T1z + T1A;
Chris@10 125 T3d = T1z - T1A;
Chris@10 126 T3b = FMA(KP559016994, T3a, T39);
Chris@10 127 T45 = FNMS(KP559016994, T3a, T39);
Chris@10 128 T3e = FMA(KP618033988, T3d, T3c);
Chris@10 129 T46 = FNMS(KP618033988, T3c, T3d);
Chris@10 130 T1C = T1y + T1B;
Chris@10 131 T1Q = T1y - T1B;
Chris@10 132 }
Chris@10 133 }
Chris@10 134 {
Chris@10 135 E T24, T23, T28, T4v;
Chris@10 136 {
Chris@10 137 E Ta, TQ, Tj, TZ, T1Z, T20, Th, T26, T27, T1X, TX, T2l, T2m, Tq, T2c;
Chris@10 138 E T2e, T12, T15, T2f, T1P, TT, TW;
Chris@10 139 Ta = ri[WS(is, 1)];
Chris@10 140 T1P = FNMS(KP250000000, T1C, T1v);
Chris@10 141 T1D = T1v + T1C;
Chris@10 142 TQ = ii[WS(is, 1)];
Chris@10 143 Tj = ri[WS(is, 4)];
Chris@10 144 T4P = FNMS(KP559016994, T1Q, T1P);
Chris@10 145 T1R = FMA(KP559016994, T1Q, T1P);
Chris@10 146 TZ = ii[WS(is, 4)];
Chris@10 147 {
Chris@10 148 E Tb, Tc, Te, Tf;
Chris@10 149 Tb = ri[WS(is, 6)];
Chris@10 150 Tc = ri[WS(is, 21)];
Chris@10 151 Te = ri[WS(is, 11)];
Chris@10 152 Tf = ri[WS(is, 16)];
Chris@10 153 {
Chris@10 154 E TR, Td, Tg, TS, TU, TV;
Chris@10 155 TR = ii[WS(is, 6)];
Chris@10 156 T1Z = Tc - Tb;
Chris@10 157 Td = Tb + Tc;
Chris@10 158 T20 = Tf - Te;
Chris@10 159 Tg = Te + Tf;
Chris@10 160 TS = ii[WS(is, 21)];
Chris@10 161 TU = ii[WS(is, 11)];
Chris@10 162 TV = ii[WS(is, 16)];
Chris@10 163 Th = Td + Tg;
Chris@10 164 T24 = Td - Tg;
Chris@10 165 T26 = TR - TS;
Chris@10 166 TT = TR + TS;
Chris@10 167 TW = TU + TV;
Chris@10 168 T27 = TV - TU;
Chris@10 169 }
Chris@10 170 }
Chris@10 171 {
Chris@10 172 E Tk, Tl, Tn, To;
Chris@10 173 Tk = ri[WS(is, 9)];
Chris@10 174 T1X = TT - TW;
Chris@10 175 TX = TT + TW;
Chris@10 176 Tl = ri[WS(is, 24)];
Chris@10 177 Tn = ri[WS(is, 14)];
Chris@10 178 To = ri[WS(is, 19)];
Chris@10 179 {
Chris@10 180 E T10, Tm, Tp, T11, T13, T14;
Chris@10 181 T10 = ii[WS(is, 9)];
Chris@10 182 T2l = Tl - Tk;
Chris@10 183 Tm = Tk + Tl;
Chris@10 184 T2m = To - Tn;
Chris@10 185 Tp = Tn + To;
Chris@10 186 T11 = ii[WS(is, 24)];
Chris@10 187 T13 = ii[WS(is, 14)];
Chris@10 188 T14 = ii[WS(is, 19)];
Chris@10 189 Tq = Tm + Tp;
Chris@10 190 T2c = Tm - Tp;
Chris@10 191 T2e = T11 - T10;
Chris@10 192 T12 = T10 + T11;
Chris@10 193 T15 = T13 + T14;
Chris@10 194 T2f = T14 - T13;
Chris@10 195 }
Chris@10 196 }
Chris@10 197 {
Chris@10 198 E T2j, T2b, T1W, T21, T4y, T2i;
Chris@10 199 {
Chris@10 200 E Ti, T16, Tr, TY, T17;
Chris@10 201 T23 = FNMS(KP250000000, Th, Ta);
Chris@10 202 Ti = Ta + Th;
Chris@10 203 T2j = T15 - T12;
Chris@10 204 T16 = T12 + T15;
Chris@10 205 Tr = Tj + Tq;
Chris@10 206 T2b = FMS(KP250000000, Tq, Tj);
Chris@10 207 T1W = FNMS(KP250000000, TX, TQ);
Chris@10 208 TY = TQ + TX;
Chris@10 209 T21 = FMA(KP618033988, T20, T1Z);
Chris@10 210 T4y = FNMS(KP618033988, T1Z, T20);
Chris@10 211 T2i = FNMS(KP250000000, T16, TZ);
Chris@10 212 T17 = TZ + T16;
Chris@10 213 Ts = Ti + Tr;
Chris@10 214 T1K = Ti - Tr;
Chris@10 215 T18 = TY - T17;
Chris@10 216 T1E = TY + T17;
Chris@10 217 }
Chris@10 218 {
Chris@10 219 E T2n, T4r, T4x, T1Y;
Chris@10 220 T2n = FMA(KP618033988, T2m, T2l);
Chris@10 221 T4r = FNMS(KP618033988, T2l, T2m);
Chris@10 222 T4x = FNMS(KP559016994, T1X, T1W);
Chris@10 223 T1Y = FMA(KP559016994, T1X, T1W);
Chris@10 224 {
Chris@10 225 E T4o, T2g, T2d, T4n, T4q, T2k;
Chris@10 226 T4o = FNMS(KP618033988, T2e, T2f);
Chris@10 227 T2g = FMA(KP618033988, T2f, T2e);
Chris@10 228 T4z = FMA(KP951056516, T4y, T4x);
Chris@10 229 T5f = FNMS(KP951056516, T4y, T4x);
Chris@10 230 T3z = FNMS(KP951056516, T21, T1Y);
Chris@10 231 T22 = FMA(KP951056516, T21, T1Y);
Chris@10 232 T4q = FMA(KP559016994, T2j, T2i);
Chris@10 233 T2k = FNMS(KP559016994, T2j, T2i);
Chris@10 234 T4s = FMA(KP951056516, T4r, T4q);
Chris@10 235 T5b = FNMS(KP951056516, T4r, T4q);
Chris@10 236 T3C = FNMS(KP951056516, T2n, T2k);
Chris@10 237 T2o = FMA(KP951056516, T2n, T2k);
Chris@10 238 T2d = FNMS(KP559016994, T2c, T2b);
Chris@10 239 T4n = FMA(KP559016994, T2c, T2b);
Chris@10 240 T28 = FNMS(KP618033988, T27, T26);
Chris@10 241 T4v = FMA(KP618033988, T26, T27);
Chris@10 242 T3D = FNMS(KP951056516, T2g, T2d);
Chris@10 243 T2h = FMA(KP951056516, T2g, T2d);
Chris@10 244 T4p = FMA(KP951056516, T4o, T4n);
Chris@10 245 T5c = FNMS(KP951056516, T4o, T4n);
Chris@10 246 }
Chris@10 247 }
Chris@10 248 }
Chris@10 249 }
Chris@10 250 {
Chris@10 251 E Tt, T19, TC, T1i, T2u, T2v, TA, T2B, T2C, T2s, T1g, T2J, T2K, TJ, T2O;
Chris@10 252 E T2Q, T1l, T1o, T2R;
Chris@10 253 {
Chris@10 254 E T4u, T25, T1c, T1f;
Chris@10 255 Tt = ri[WS(is, 2)];
Chris@10 256 T19 = ii[WS(is, 2)];
Chris@10 257 TC = ri[WS(is, 3)];
Chris@10 258 T4u = FNMS(KP559016994, T24, T23);
Chris@10 259 T25 = FMA(KP559016994, T24, T23);
Chris@10 260 T1i = ii[WS(is, 3)];
Chris@10 261 {
Chris@10 262 E Tu, Tv, Tx, Ty;
Chris@10 263 Tu = ri[WS(is, 7)];
Chris@10 264 T4w = FNMS(KP951056516, T4v, T4u);
Chris@10 265 T5e = FMA(KP951056516, T4v, T4u);
Chris@10 266 T3A = FNMS(KP951056516, T28, T25);
Chris@10 267 T29 = FMA(KP951056516, T28, T25);
Chris@10 268 Tv = ri[WS(is, 22)];
Chris@10 269 Tx = ri[WS(is, 12)];
Chris@10 270 Ty = ri[WS(is, 17)];
Chris@10 271 {
Chris@10 272 E T1a, Tw, Tz, T1b, T1d, T1e;
Chris@10 273 T1a = ii[WS(is, 7)];
Chris@10 274 T2u = Tv - Tu;
Chris@10 275 Tw = Tu + Tv;
Chris@10 276 T2v = Ty - Tx;
Chris@10 277 Tz = Tx + Ty;
Chris@10 278 T1b = ii[WS(is, 22)];
Chris@10 279 T1d = ii[WS(is, 12)];
Chris@10 280 T1e = ii[WS(is, 17)];
Chris@10 281 TA = Tw + Tz;
Chris@10 282 T2z = Tz - Tw;
Chris@10 283 T2B = T1b - T1a;
Chris@10 284 T1c = T1a + T1b;
Chris@10 285 T1f = T1d + T1e;
Chris@10 286 T2C = T1d - T1e;
Chris@10 287 }
Chris@10 288 }
Chris@10 289 {
Chris@10 290 E TD, TE, TG, TH;
Chris@10 291 TD = ri[WS(is, 8)];
Chris@10 292 T2s = T1f - T1c;
Chris@10 293 T1g = T1c + T1f;
Chris@10 294 TE = ri[WS(is, 23)];
Chris@10 295 TG = ri[WS(is, 13)];
Chris@10 296 TH = ri[WS(is, 18)];
Chris@10 297 {
Chris@10 298 E T1j, TF, TI, T1k, T1m, T1n;
Chris@10 299 T1j = ii[WS(is, 8)];
Chris@10 300 T2J = TD - TE;
Chris@10 301 TF = TD + TE;
Chris@10 302 T2K = TG - TH;
Chris@10 303 TI = TG + TH;
Chris@10 304 T1k = ii[WS(is, 23)];
Chris@10 305 T1m = ii[WS(is, 13)];
Chris@10 306 T1n = ii[WS(is, 18)];
Chris@10 307 TJ = TF + TI;
Chris@10 308 T2O = TI - TF;
Chris@10 309 T2Q = T1k - T1j;
Chris@10 310 T1l = T1j + T1k;
Chris@10 311 T1o = T1m + T1n;
Chris@10 312 T2R = T1n - T1m;
Chris@10 313 }
Chris@10 314 }
Chris@10 315 }
Chris@10 316 {
Chris@10 317 E T2H, T2N, T2r, T2w, T49, T2G;
Chris@10 318 {
Chris@10 319 E TB, T1p, TK, T1h, T1q;
Chris@10 320 T2y = FNMS(KP250000000, TA, Tt);
Chris@10 321 TB = Tt + TA;
Chris@10 322 T2H = T1o - T1l;
Chris@10 323 T1p = T1l + T1o;
Chris@10 324 TK = TC + TJ;
Chris@10 325 T2N = FNMS(KP250000000, TJ, TC);
Chris@10 326 T2r = FNMS(KP250000000, T1g, T19);
Chris@10 327 T1h = T19 + T1g;
Chris@10 328 T2w = FMA(KP618033988, T2v, T2u);
Chris@10 329 T49 = FNMS(KP618033988, T2u, T2v);
Chris@10 330 T2G = FNMS(KP250000000, T1p, T1i);
Chris@10 331 T1q = T1i + T1p;
Chris@10 332 TL = TB + TK;
Chris@10 333 T1L = TB - TK;
Chris@10 334 T1r = T1h - T1q;
Chris@10 335 T1F = T1h + T1q;
Chris@10 336 }
Chris@10 337 {
Chris@10 338 E T2S, T4j, T48, T2t;
Chris@10 339 T2S = FMA(KP618033988, T2R, T2Q);
Chris@10 340 T4j = FNMS(KP618033988, T2Q, T2R);
Chris@10 341 T48 = FMA(KP559016994, T2s, T2r);
Chris@10 342 T2t = FNMS(KP559016994, T2s, T2r);
Chris@10 343 {
Chris@10 344 E T4g, T2L, T2I, T4f, T4i, T2P;
Chris@10 345 T4g = FNMS(KP618033988, T2J, T2K);
Chris@10 346 T2L = FMA(KP618033988, T2K, T2J);
Chris@10 347 T4a = FMA(KP951056516, T49, T48);
Chris@10 348 T57 = FNMS(KP951056516, T49, T48);
Chris@10 349 T3v = FNMS(KP951056516, T2w, T2t);
Chris@10 350 T2x = FMA(KP951056516, T2w, T2t);
Chris@10 351 T4i = FMA(KP559016994, T2O, T2N);
Chris@10 352 T2P = FNMS(KP559016994, T2O, T2N);
Chris@10 353 T4k = FNMS(KP951056516, T4j, T4i);
Chris@10 354 T55 = FMA(KP951056516, T4j, T4i);
Chris@10 355 T3s = FMA(KP951056516, T2S, T2P);
Chris@10 356 T2T = FNMS(KP951056516, T2S, T2P);
Chris@10 357 T2I = FNMS(KP559016994, T2H, T2G);
Chris@10 358 T4f = FMA(KP559016994, T2H, T2G);
Chris@10 359 T2D = FNMS(KP618033988, T2C, T2B);
Chris@10 360 T4c = FMA(KP618033988, T2B, T2C);
Chris@10 361 T3t = FMA(KP951056516, T2L, T2I);
Chris@10 362 T2M = FNMS(KP951056516, T2L, T2I);
Chris@10 363 T4h = FNMS(KP951056516, T4g, T4f);
Chris@10 364 T54 = FMA(KP951056516, T4g, T4f);
Chris@10 365 }
Chris@10 366 }
Chris@10 367 }
Chris@10 368 }
Chris@10 369 }
Chris@10 370 {
Chris@10 371 E T4d, T58, T3w, T3H, T3r, T3k, T36, T38, T3o, T3q, T3j, T2Z, T37;
Chris@10 372 {
Chris@10 373 E T2E, T1s, T1u, TP, T1t;
Chris@10 374 {
Chris@10 375 E TM, TO, TN, T4b, T2A;
Chris@10 376 TM = Ts + TL;
Chris@10 377 TO = Ts - TL;
Chris@10 378 T4b = FMA(KP559016994, T2z, T2y);
Chris@10 379 T2A = FNMS(KP559016994, T2z, T2y);
Chris@10 380 TN = FNMS(KP250000000, TM, T9);
Chris@10 381 T4d = FMA(KP951056516, T4c, T4b);
Chris@10 382 T58 = FNMS(KP951056516, T4c, T4b);
Chris@10 383 T3w = FMA(KP951056516, T2D, T2A);
Chris@10 384 T2E = FNMS(KP951056516, T2D, T2A);
Chris@10 385 T1s = FMA(KP618033988, T1r, T18);
Chris@10 386 T1u = FNMS(KP618033988, T18, T1r);
Chris@10 387 ro[0] = T9 + TM;
Chris@10 388 TP = FMA(KP559016994, TO, TN);
Chris@10 389 T1t = FNMS(KP559016994, TO, TN);
Chris@10 390 }
Chris@10 391 {
Chris@10 392 E T1J, T1N, T1M, T1O, T1G, T1I, T1H;
Chris@10 393 T1G = T1E + T1F;
Chris@10 394 T1I = T1E - T1F;
Chris@10 395 ro[WS(os, 15)] = FMA(KP951056516, T1u, T1t);
Chris@10 396 ro[WS(os, 10)] = FNMS(KP951056516, T1u, T1t);
Chris@10 397 ro[WS(os, 5)] = FMA(KP951056516, T1s, TP);
Chris@10 398 ro[WS(os, 20)] = FNMS(KP951056516, T1s, TP);
Chris@10 399 T1H = FNMS(KP250000000, T1G, T1D);
Chris@10 400 io[0] = T1D + T1G;
Chris@10 401 T1J = FMA(KP559016994, T1I, T1H);
Chris@10 402 T1N = FNMS(KP559016994, T1I, T1H);
Chris@10 403 T1M = FMA(KP618033988, T1L, T1K);
Chris@10 404 T1O = FNMS(KP618033988, T1K, T1L);
Chris@10 405 {
Chris@10 406 E T1V, T3f, T3m, T3n, T2W, T2Y, T32, T3g, T3h, T35, T3i, T2X;
Chris@10 407 T3H = FMA(KP951056516, T1U, T1R);
Chris@10 408 T1V = FNMS(KP951056516, T1U, T1R);
Chris@10 409 T3f = FMA(KP951056516, T3e, T3b);
Chris@10 410 T3r = FNMS(KP951056516, T3e, T3b);
Chris@10 411 io[WS(os, 15)] = FNMS(KP951056516, T1O, T1N);
Chris@10 412 io[WS(os, 10)] = FMA(KP951056516, T1O, T1N);
Chris@10 413 io[WS(os, 20)] = FMA(KP951056516, T1M, T1J);
Chris@10 414 io[WS(os, 5)] = FNMS(KP951056516, T1M, T1J);
Chris@10 415 {
Chris@10 416 E T30, T2a, T2p, T31, T33, T2F, T2U, T34, T2q, T2V;
Chris@10 417 T30 = FMA(KP256756360, T22, T29);
Chris@10 418 T2a = FNMS(KP256756360, T29, T22);
Chris@10 419 T2p = FMA(KP634619297, T2o, T2h);
Chris@10 420 T31 = FNMS(KP634619297, T2h, T2o);
Chris@10 421 T33 = FMA(KP549754652, T2x, T2E);
Chris@10 422 T2F = FNMS(KP549754652, T2E, T2x);
Chris@10 423 T2U = FNMS(KP939062505, T2T, T2M);
Chris@10 424 T34 = FMA(KP939062505, T2M, T2T);
Chris@10 425 T3m = FNMS(KP871714437, T2p, T2a);
Chris@10 426 T2q = FMA(KP871714437, T2p, T2a);
Chris@10 427 T3n = FNMS(KP831864738, T2U, T2F);
Chris@10 428 T2V = FMA(KP831864738, T2U, T2F);
Chris@10 429 T2W = FMA(KP904730450, T2V, T2q);
Chris@10 430 T2Y = FNMS(KP904730450, T2V, T2q);
Chris@10 431 T32 = FNMS(KP871714437, T31, T30);
Chris@10 432 T3g = FMA(KP871714437, T31, T30);
Chris@10 433 T3h = FMA(KP831864738, T34, T33);
Chris@10 434 T35 = FNMS(KP831864738, T34, T33);
Chris@10 435 }
Chris@10 436 io[WS(os, 1)] = FMA(KP968583161, T2W, T1V);
Chris@10 437 T3i = FMA(KP904730450, T3h, T3g);
Chris@10 438 T3k = FNMS(KP904730450, T3h, T3g);
Chris@10 439 T36 = FMA(KP559154169, T35, T32);
Chris@10 440 T38 = FNMS(KP683113946, T32, T35);
Chris@10 441 ro[WS(os, 1)] = FMA(KP968583161, T3i, T3f);
Chris@10 442 T2X = FNMS(KP242145790, T2W, T1V);
Chris@10 443 T3o = FMA(KP559154169, T3n, T3m);
Chris@10 444 T3q = FNMS(KP683113946, T3m, T3n);
Chris@10 445 T3j = FNMS(KP242145790, T3i, T3f);
Chris@10 446 T2Z = FMA(KP541454447, T2Y, T2X);
Chris@10 447 T37 = FNMS(KP541454447, T2Y, T2X);
Chris@10 448 }
Chris@10 449 }
Chris@10 450 }
Chris@10 451 {
Chris@10 452 E T47, T4R, T5A, T5w, T5y, T5E, T5G, T5z, T5t, T5x;
Chris@10 453 {
Chris@10 454 E T53, T5j, T5u, T5v, T5i, T5D, T5m, T5p, T5C, T3p, T3l, T5s, T5q, T5r;
Chris@10 455 T47 = FMA(KP951056516, T46, T45);
Chris@10 456 T53 = FNMS(KP951056516, T46, T45);
Chris@10 457 T3p = FNMS(KP541454447, T3k, T3j);
Chris@10 458 T3l = FMA(KP541454447, T3k, T3j);
Chris@10 459 io[WS(os, 16)] = FNMS(KP833417178, T38, T37);
Chris@10 460 io[WS(os, 11)] = FMA(KP833417178, T38, T37);
Chris@10 461 io[WS(os, 21)] = FMA(KP921177326, T36, T2Z);
Chris@10 462 io[WS(os, 6)] = FNMS(KP921177326, T36, T2Z);
Chris@10 463 ro[WS(os, 11)] = FNMS(KP833417178, T3q, T3p);
Chris@10 464 ro[WS(os, 16)] = FMA(KP833417178, T3q, T3p);
Chris@10 465 ro[WS(os, 21)] = FNMS(KP921177326, T3o, T3l);
Chris@10 466 ro[WS(os, 6)] = FMA(KP921177326, T3o, T3l);
Chris@10 467 T5j = FMA(KP951056516, T4Q, T4P);
Chris@10 468 T4R = FNMS(KP951056516, T4Q, T4P);
Chris@10 469 {
Chris@10 470 E T5k, T56, T59, T5l, T5n, T5d, T5g, T5o, T5a, T5h;
Chris@10 471 T5k = FNMS(KP062914667, T54, T55);
Chris@10 472 T56 = FMA(KP062914667, T55, T54);
Chris@10 473 T59 = FMA(KP634619297, T58, T57);
Chris@10 474 T5l = FNMS(KP634619297, T57, T58);
Chris@10 475 T5n = FNMS(KP470564281, T5b, T5c);
Chris@10 476 T5d = FMA(KP470564281, T5c, T5b);
Chris@10 477 T5g = FMA(KP549754652, T5f, T5e);
Chris@10 478 T5o = FNMS(KP549754652, T5e, T5f);
Chris@10 479 T5u = FNMS(KP845997307, T59, T56);
Chris@10 480 T5a = FMA(KP845997307, T59, T56);
Chris@10 481 T5v = FNMS(KP968479752, T5g, T5d);
Chris@10 482 T5h = FMA(KP968479752, T5g, T5d);
Chris@10 483 T5i = FMA(KP906616052, T5h, T5a);
Chris@10 484 T5A = FNMS(KP906616052, T5h, T5a);
Chris@10 485 T5D = FNMS(KP845997307, T5l, T5k);
Chris@10 486 T5m = FMA(KP845997307, T5l, T5k);
Chris@10 487 T5p = FMA(KP968479752, T5o, T5n);
Chris@10 488 T5C = FNMS(KP968479752, T5o, T5n);
Chris@10 489 }
Chris@10 490 ro[WS(os, 2)] = FMA(KP998026728, T5i, T53);
Chris@10 491 T5s = FMA(KP906616052, T5p, T5m);
Chris@10 492 T5q = FNMS(KP906616052, T5p, T5m);
Chris@10 493 T5w = FNMS(KP560319534, T5v, T5u);
Chris@10 494 T5y = FMA(KP681693190, T5u, T5v);
Chris@10 495 T5E = FNMS(KP681693190, T5D, T5C);
Chris@10 496 T5G = FMA(KP560319534, T5C, T5D);
Chris@10 497 T5r = FMA(KP249506682, T5q, T5j);
Chris@10 498 io[WS(os, 2)] = FNMS(KP998026728, T5q, T5j);
Chris@10 499 T5z = FNMS(KP249506682, T5i, T53);
Chris@10 500 T5t = FNMS(KP557913902, T5s, T5r);
Chris@10 501 T5x = FMA(KP557913902, T5s, T5r);
Chris@10 502 }
Chris@10 503 {
Chris@10 504 E T4W, T4M, T4O, T50, T52, T4V, T4F, T4N;
Chris@10 505 {
Chris@10 506 E T4Y, T4Z, T4C, T4E, T4I, T4T, T4S, T4L, T5F, T5B, T4U, T4D;
Chris@10 507 T5F = FMA(KP557913902, T5A, T5z);
Chris@10 508 T5B = FNMS(KP557913902, T5A, T5z);
Chris@10 509 io[WS(os, 7)] = FMA(KP860541664, T5y, T5x);
Chris@10 510 io[WS(os, 22)] = FNMS(KP860541664, T5y, T5x);
Chris@10 511 io[WS(os, 17)] = FMA(KP949179823, T5w, T5t);
Chris@10 512 io[WS(os, 12)] = FNMS(KP949179823, T5w, T5t);
Chris@10 513 ro[WS(os, 12)] = FNMS(KP949179823, T5G, T5F);
Chris@10 514 ro[WS(os, 17)] = FMA(KP949179823, T5G, T5F);
Chris@10 515 ro[WS(os, 7)] = FNMS(KP860541664, T5E, T5B);
Chris@10 516 ro[WS(os, 22)] = FMA(KP860541664, T5E, T5B);
Chris@10 517 {
Chris@10 518 E T4J, T4e, T4l, T4K, T4G, T4t, T4A, T4H, T4m, T4B;
Chris@10 519 T4J = FNMS(KP062914667, T4a, T4d);
Chris@10 520 T4e = FMA(KP062914667, T4d, T4a);
Chris@10 521 T4l = FNMS(KP827271945, T4k, T4h);
Chris@10 522 T4K = FMA(KP827271945, T4h, T4k);
Chris@10 523 T4G = FNMS(KP126329378, T4p, T4s);
Chris@10 524 T4t = FMA(KP126329378, T4s, T4p);
Chris@10 525 T4A = FMA(KP939062505, T4z, T4w);
Chris@10 526 T4H = FNMS(KP939062505, T4w, T4z);
Chris@10 527 T4Y = FNMS(KP772036680, T4l, T4e);
Chris@10 528 T4m = FMA(KP772036680, T4l, T4e);
Chris@10 529 T4Z = FNMS(KP734762448, T4A, T4t);
Chris@10 530 T4B = FMA(KP734762448, T4A, T4t);
Chris@10 531 T4C = FMA(KP994076283, T4B, T4m);
Chris@10 532 T4E = FNMS(KP994076283, T4B, T4m);
Chris@10 533 T4I = FMA(KP734762448, T4H, T4G);
Chris@10 534 T4T = FNMS(KP734762448, T4H, T4G);
Chris@10 535 T4S = FMA(KP772036680, T4K, T4J);
Chris@10 536 T4L = FNMS(KP772036680, T4K, T4J);
Chris@10 537 }
Chris@10 538 ro[WS(os, 3)] = FMA(KP998026728, T4C, T47);
Chris@10 539 T4U = FMA(KP994076283, T4T, T4S);
Chris@10 540 T4W = FNMS(KP994076283, T4T, T4S);
Chris@10 541 T4M = FNMS(KP621716863, T4L, T4I);
Chris@10 542 T4O = FMA(KP614372930, T4I, T4L);
Chris@10 543 io[WS(os, 3)] = FNMS(KP998026728, T4U, T4R);
Chris@10 544 T4D = FNMS(KP249506682, T4C, T47);
Chris@10 545 T50 = FMA(KP614372930, T4Z, T4Y);
Chris@10 546 T52 = FNMS(KP621716863, T4Y, T4Z);
Chris@10 547 T4V = FMA(KP249506682, T4U, T4R);
Chris@10 548 T4F = FNMS(KP557913902, T4E, T4D);
Chris@10 549 T4N = FMA(KP557913902, T4E, T4D);
Chris@10 550 }
Chris@10 551 {
Chris@10 552 E T3S, T3T, T3G, T41, T3K, T3N, T40, T51, T4X, T3Q, T3O, T3P;
Chris@10 553 T51 = FMA(KP557913902, T4W, T4V);
Chris@10 554 T4X = FNMS(KP557913902, T4W, T4V);
Chris@10 555 ro[WS(os, 18)] = FNMS(KP949179823, T4O, T4N);
Chris@10 556 ro[WS(os, 13)] = FMA(KP949179823, T4O, T4N);
Chris@10 557 ro[WS(os, 8)] = FMA(KP943557151, T4M, T4F);
Chris@10 558 ro[WS(os, 23)] = FNMS(KP943557151, T4M, T4F);
Chris@10 559 io[WS(os, 8)] = FMA(KP943557151, T52, T51);
Chris@10 560 io[WS(os, 23)] = FNMS(KP943557151, T52, T51);
Chris@10 561 io[WS(os, 18)] = FNMS(KP949179823, T50, T4X);
Chris@10 562 io[WS(os, 13)] = FMA(KP949179823, T50, T4X);
Chris@10 563 {
Chris@10 564 E T3I, T3u, T3x, T3J, T3L, T3B, T3E, T3M, T3y, T3F;
Chris@10 565 T3I = FMA(KP126329378, T3s, T3t);
Chris@10 566 T3u = FNMS(KP126329378, T3t, T3s);
Chris@10 567 T3x = FNMS(KP470564281, T3w, T3v);
Chris@10 568 T3J = FMA(KP470564281, T3v, T3w);
Chris@10 569 T3L = FNMS(KP634619297, T3z, T3A);
Chris@10 570 T3B = FMA(KP634619297, T3A, T3z);
Chris@10 571 T3E = FNMS(KP827271945, T3D, T3C);
Chris@10 572 T3M = FMA(KP827271945, T3C, T3D);
Chris@10 573 T3S = FMA(KP912018591, T3x, T3u);
Chris@10 574 T3y = FNMS(KP912018591, T3x, T3u);
Chris@10 575 T3T = FMA(KP912575812, T3E, T3B);
Chris@10 576 T3F = FNMS(KP912575812, T3E, T3B);
Chris@10 577 T3G = FNMS(KP851038619, T3F, T3y);
Chris@10 578 T3Y = FMA(KP851038619, T3F, T3y);
Chris@10 579 T41 = FNMS(KP912018591, T3J, T3I);
Chris@10 580 T3K = FMA(KP912018591, T3J, T3I);
Chris@10 581 T3N = FMA(KP912575812, T3M, T3L);
Chris@10 582 T40 = FNMS(KP912575812, T3M, T3L);
Chris@10 583 }
Chris@10 584 ro[WS(os, 4)] = FNMS(KP992114701, T3G, T3r);
Chris@10 585 T3Q = FNMS(KP851038619, T3N, T3K);
Chris@10 586 T3O = FMA(KP851038619, T3N, T3K);
Chris@10 587 T3U = FNMS(KP525970792, T3T, T3S);
Chris@10 588 T3W = FMA(KP726211448, T3S, T3T);
Chris@10 589 T42 = FNMS(KP726211448, T41, T40);
Chris@10 590 T44 = FMA(KP525970792, T40, T41);
Chris@10 591 T3P = FMA(KP248028675, T3O, T3H);
Chris@10 592 io[WS(os, 4)] = FNMS(KP992114701, T3O, T3H);
Chris@10 593 T3X = FMA(KP248028675, T3G, T3r);
Chris@10 594 T3R = FNMS(KP554608978, T3Q, T3P);
Chris@10 595 T3V = FMA(KP554608978, T3Q, T3P);
Chris@10 596 }
Chris@10 597 }
Chris@10 598 }
Chris@10 599 }
Chris@10 600 }
Chris@10 601 T3Z = FMA(KP554608978, T3Y, T3X);
Chris@10 602 T43 = FNMS(KP554608978, T3Y, T3X);
Chris@10 603 io[WS(os, 9)] = FNMS(KP803003575, T3W, T3V);
Chris@10 604 io[WS(os, 24)] = FMA(KP803003575, T3W, T3V);
Chris@10 605 io[WS(os, 19)] = FNMS(KP943557151, T3U, T3R);
Chris@10 606 io[WS(os, 14)] = FMA(KP943557151, T3U, T3R);
Chris@10 607 ro[WS(os, 14)] = FNMS(KP943557151, T44, T43);
Chris@10 608 ro[WS(os, 19)] = FMA(KP943557151, T44, T43);
Chris@10 609 ro[WS(os, 24)] = FMA(KP803003575, T42, T3Z);
Chris@10 610 ro[WS(os, 9)] = FNMS(KP803003575, T42, T3Z);
Chris@10 611 }
Chris@10 612 }
Chris@10 613 }
Chris@10 614
Chris@10 615 static const kdft_desc desc = { 25, "n1_25", {84, 0, 268, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 616
Chris@10 617 void X(codelet_n1_25) (planner *p) {
Chris@10 618 X(kdft_register) (p, n1_25, &desc);
Chris@10 619 }
Chris@10 620
Chris@10 621 #else /* HAVE_FMA */
Chris@10 622
Chris@10 623 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include n.h */
Chris@10 624
Chris@10 625 /*
Chris@10 626 * This function contains 352 FP additions, 184 FP multiplications,
Chris@10 627 * (or, 260 additions, 92 multiplications, 92 fused multiply/add),
Chris@10 628 * 101 stack variables, 20 constants, and 100 memory accesses
Chris@10 629 */
Chris@10 630 #include "n.h"
Chris@10 631
Chris@10 632 static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 633 {
Chris@10 634 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@10 635 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@10 636 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@10 637 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@10 638 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 639 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@10 640 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 641 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@10 642 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@10 643 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@10 644 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@10 645 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 646 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@10 647 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@10 648 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@10 649 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 650 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 651 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 652 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 653 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 654 {
Chris@10 655 INT i;
Chris@10 656 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) {
Chris@10 657 E T9, T4u, T2T, TP, T3H, TW, T5y, T3I, T2Q, T4v, Ti, Tr, Ts, T5m, T5n;
Chris@10 658 E T5v, T18, T4G, T34, T3M, T1G, T4J, T38, T3T, T1v, T4K, T37, T3W, T1j, T4H;
Chris@10 659 E T35, T3P, TB, TK, TL, T5p, T5q, T5w, T1T, T4N, T3c, T41, T2r, T4Q, T3e;
Chris@10 660 E T4b, T2g, T4R, T3f, T48, T24, T4O, T3b, T44;
Chris@10 661 {
Chris@10 662 E T1, T4, T7, T8, T2S, T2R, TN, TO;
Chris@10 663 T1 = ri[0];
Chris@10 664 {
Chris@10 665 E T2, T3, T5, T6;
Chris@10 666 T2 = ri[WS(is, 5)];
Chris@10 667 T3 = ri[WS(is, 20)];
Chris@10 668 T4 = T2 + T3;
Chris@10 669 T5 = ri[WS(is, 10)];
Chris@10 670 T6 = ri[WS(is, 15)];
Chris@10 671 T7 = T5 + T6;
Chris@10 672 T8 = T4 + T7;
Chris@10 673 T2S = T5 - T6;
Chris@10 674 T2R = T2 - T3;
Chris@10 675 }
Chris@10 676 T9 = T1 + T8;
Chris@10 677 T4u = FNMS(KP587785252, T2R, KP951056516 * T2S);
Chris@10 678 T2T = FMA(KP951056516, T2R, KP587785252 * T2S);
Chris@10 679 TN = KP559016994 * (T4 - T7);
Chris@10 680 TO = FNMS(KP250000000, T8, T1);
Chris@10 681 TP = TN + TO;
Chris@10 682 T3H = TO - TN;
Chris@10 683 }
Chris@10 684 {
Chris@10 685 E T2N, T2K, T2L, TS, T2O, TV, T2M, T2P;
Chris@10 686 T2N = ii[0];
Chris@10 687 {
Chris@10 688 E TQ, TR, TT, TU;
Chris@10 689 TQ = ii[WS(is, 5)];
Chris@10 690 TR = ii[WS(is, 20)];
Chris@10 691 T2K = TQ + TR;
Chris@10 692 TT = ii[WS(is, 10)];
Chris@10 693 TU = ii[WS(is, 15)];
Chris@10 694 T2L = TT + TU;
Chris@10 695 TS = TQ - TR;
Chris@10 696 T2O = T2K + T2L;
Chris@10 697 TV = TT - TU;
Chris@10 698 }
Chris@10 699 TW = FMA(KP951056516, TS, KP587785252 * TV);
Chris@10 700 T5y = T2N + T2O;
Chris@10 701 T3I = FNMS(KP587785252, TS, KP951056516 * TV);
Chris@10 702 T2M = KP559016994 * (T2K - T2L);
Chris@10 703 T2P = FNMS(KP250000000, T2O, T2N);
Chris@10 704 T2Q = T2M + T2P;
Chris@10 705 T4v = T2P - T2M;
Chris@10 706 }
Chris@10 707 {
Chris@10 708 E Ta, T1c, Tj, T1z, Th, T1h, TY, T1g, T13, T1d, T16, T1b, Tq, T1E, T1l;
Chris@10 709 E T1D, T1q, T1A, T1t, T1y;
Chris@10 710 Ta = ri[WS(is, 1)];
Chris@10 711 T1c = ii[WS(is, 1)];
Chris@10 712 Tj = ri[WS(is, 4)];
Chris@10 713 T1z = ii[WS(is, 4)];
Chris@10 714 {
Chris@10 715 E Tb, Tc, Td, Te, Tf, Tg;
Chris@10 716 Tb = ri[WS(is, 6)];
Chris@10 717 Tc = ri[WS(is, 21)];
Chris@10 718 Td = Tb + Tc;
Chris@10 719 Te = ri[WS(is, 11)];
Chris@10 720 Tf = ri[WS(is, 16)];
Chris@10 721 Tg = Te + Tf;
Chris@10 722 Th = Td + Tg;
Chris@10 723 T1h = Te - Tf;
Chris@10 724 TY = KP559016994 * (Td - Tg);
Chris@10 725 T1g = Tb - Tc;
Chris@10 726 }
Chris@10 727 {
Chris@10 728 E T11, T12, T19, T14, T15, T1a;
Chris@10 729 T11 = ii[WS(is, 6)];
Chris@10 730 T12 = ii[WS(is, 21)];
Chris@10 731 T19 = T11 + T12;
Chris@10 732 T14 = ii[WS(is, 11)];
Chris@10 733 T15 = ii[WS(is, 16)];
Chris@10 734 T1a = T14 + T15;
Chris@10 735 T13 = T11 - T12;
Chris@10 736 T1d = T19 + T1a;
Chris@10 737 T16 = T14 - T15;
Chris@10 738 T1b = KP559016994 * (T19 - T1a);
Chris@10 739 }
Chris@10 740 {
Chris@10 741 E Tk, Tl, Tm, Tn, To, Tp;
Chris@10 742 Tk = ri[WS(is, 9)];
Chris@10 743 Tl = ri[WS(is, 24)];
Chris@10 744 Tm = Tk + Tl;
Chris@10 745 Tn = ri[WS(is, 14)];
Chris@10 746 To = ri[WS(is, 19)];
Chris@10 747 Tp = Tn + To;
Chris@10 748 Tq = Tm + Tp;
Chris@10 749 T1E = Tn - To;
Chris@10 750 T1l = KP559016994 * (Tm - Tp);
Chris@10 751 T1D = Tk - Tl;
Chris@10 752 }
Chris@10 753 {
Chris@10 754 E T1o, T1p, T1w, T1r, T1s, T1x;
Chris@10 755 T1o = ii[WS(is, 9)];
Chris@10 756 T1p = ii[WS(is, 24)];
Chris@10 757 T1w = T1o + T1p;
Chris@10 758 T1r = ii[WS(is, 14)];
Chris@10 759 T1s = ii[WS(is, 19)];
Chris@10 760 T1x = T1r + T1s;
Chris@10 761 T1q = T1o - T1p;
Chris@10 762 T1A = T1w + T1x;
Chris@10 763 T1t = T1r - T1s;
Chris@10 764 T1y = KP559016994 * (T1w - T1x);
Chris@10 765 }
Chris@10 766 Ti = Ta + Th;
Chris@10 767 Tr = Tj + Tq;
Chris@10 768 Ts = Ti + Tr;
Chris@10 769 T5m = T1c + T1d;
Chris@10 770 T5n = T1z + T1A;
Chris@10 771 T5v = T5m + T5n;
Chris@10 772 {
Chris@10 773 E T17, T3L, T10, T3K, TZ;
Chris@10 774 T17 = FMA(KP951056516, T13, KP587785252 * T16);
Chris@10 775 T3L = FNMS(KP587785252, T13, KP951056516 * T16);
Chris@10 776 TZ = FNMS(KP250000000, Th, Ta);
Chris@10 777 T10 = TY + TZ;
Chris@10 778 T3K = TZ - TY;
Chris@10 779 T18 = T10 + T17;
Chris@10 780 T4G = T3K + T3L;
Chris@10 781 T34 = T10 - T17;
Chris@10 782 T3M = T3K - T3L;
Chris@10 783 }
Chris@10 784 {
Chris@10 785 E T1F, T3R, T1C, T3S, T1B;
Chris@10 786 T1F = FMA(KP951056516, T1D, KP587785252 * T1E);
Chris@10 787 T3R = FNMS(KP587785252, T1D, KP951056516 * T1E);
Chris@10 788 T1B = FNMS(KP250000000, T1A, T1z);
Chris@10 789 T1C = T1y + T1B;
Chris@10 790 T3S = T1B - T1y;
Chris@10 791 T1G = T1C - T1F;
Chris@10 792 T4J = T3S - T3R;
Chris@10 793 T38 = T1F + T1C;
Chris@10 794 T3T = T3R + T3S;
Chris@10 795 }
Chris@10 796 {
Chris@10 797 E T1u, T3V, T1n, T3U, T1m;
Chris@10 798 T1u = FMA(KP951056516, T1q, KP587785252 * T1t);
Chris@10 799 T3V = FNMS(KP587785252, T1q, KP951056516 * T1t);
Chris@10 800 T1m = FNMS(KP250000000, Tq, Tj);
Chris@10 801 T1n = T1l + T1m;
Chris@10 802 T3U = T1m - T1l;
Chris@10 803 T1v = T1n + T1u;
Chris@10 804 T4K = T3U + T3V;
Chris@10 805 T37 = T1n - T1u;
Chris@10 806 T3W = T3U - T3V;
Chris@10 807 }
Chris@10 808 {
Chris@10 809 E T1i, T3N, T1f, T3O, T1e;
Chris@10 810 T1i = FMA(KP951056516, T1g, KP587785252 * T1h);
Chris@10 811 T3N = FNMS(KP587785252, T1g, KP951056516 * T1h);
Chris@10 812 T1e = FNMS(KP250000000, T1d, T1c);
Chris@10 813 T1f = T1b + T1e;
Chris@10 814 T3O = T1e - T1b;
Chris@10 815 T1j = T1f - T1i;
Chris@10 816 T4H = T3O - T3N;
Chris@10 817 T35 = T1i + T1f;
Chris@10 818 T3P = T3N + T3O;
Chris@10 819 }
Chris@10 820 }
Chris@10 821 {
Chris@10 822 E Tt, T1X, TC, T2k, TA, T22, T1J, T21, T1O, T1Y, T1R, T1W, TJ, T2p, T26;
Chris@10 823 E T2o, T2b, T2l, T2e, T2j;
Chris@10 824 Tt = ri[WS(is, 2)];
Chris@10 825 T1X = ii[WS(is, 2)];
Chris@10 826 TC = ri[WS(is, 3)];
Chris@10 827 T2k = ii[WS(is, 3)];
Chris@10 828 {
Chris@10 829 E Tu, Tv, Tw, Tx, Ty, Tz;
Chris@10 830 Tu = ri[WS(is, 7)];
Chris@10 831 Tv = ri[WS(is, 22)];
Chris@10 832 Tw = Tu + Tv;
Chris@10 833 Tx = ri[WS(is, 12)];
Chris@10 834 Ty = ri[WS(is, 17)];
Chris@10 835 Tz = Tx + Ty;
Chris@10 836 TA = Tw + Tz;
Chris@10 837 T22 = Tx - Ty;
Chris@10 838 T1J = KP559016994 * (Tw - Tz);
Chris@10 839 T21 = Tu - Tv;
Chris@10 840 }
Chris@10 841 {
Chris@10 842 E T1M, T1N, T1U, T1P, T1Q, T1V;
Chris@10 843 T1M = ii[WS(is, 7)];
Chris@10 844 T1N = ii[WS(is, 22)];
Chris@10 845 T1U = T1M + T1N;
Chris@10 846 T1P = ii[WS(is, 12)];
Chris@10 847 T1Q = ii[WS(is, 17)];
Chris@10 848 T1V = T1P + T1Q;
Chris@10 849 T1O = T1M - T1N;
Chris@10 850 T1Y = T1U + T1V;
Chris@10 851 T1R = T1P - T1Q;
Chris@10 852 T1W = KP559016994 * (T1U - T1V);
Chris@10 853 }
Chris@10 854 {
Chris@10 855 E TD, TE, TF, TG, TH, TI;
Chris@10 856 TD = ri[WS(is, 8)];
Chris@10 857 TE = ri[WS(is, 23)];
Chris@10 858 TF = TD + TE;
Chris@10 859 TG = ri[WS(is, 13)];
Chris@10 860 TH = ri[WS(is, 18)];
Chris@10 861 TI = TG + TH;
Chris@10 862 TJ = TF + TI;
Chris@10 863 T2p = TG - TH;
Chris@10 864 T26 = KP559016994 * (TF - TI);
Chris@10 865 T2o = TD - TE;
Chris@10 866 }
Chris@10 867 {
Chris@10 868 E T29, T2a, T2h, T2c, T2d, T2i;
Chris@10 869 T29 = ii[WS(is, 8)];
Chris@10 870 T2a = ii[WS(is, 23)];
Chris@10 871 T2h = T29 + T2a;
Chris@10 872 T2c = ii[WS(is, 13)];
Chris@10 873 T2d = ii[WS(is, 18)];
Chris@10 874 T2i = T2c + T2d;
Chris@10 875 T2b = T29 - T2a;
Chris@10 876 T2l = T2h + T2i;
Chris@10 877 T2e = T2c - T2d;
Chris@10 878 T2j = KP559016994 * (T2h - T2i);
Chris@10 879 }
Chris@10 880 TB = Tt + TA;
Chris@10 881 TK = TC + TJ;
Chris@10 882 TL = TB + TK;
Chris@10 883 T5p = T1X + T1Y;
Chris@10 884 T5q = T2k + T2l;
Chris@10 885 T5w = T5p + T5q;
Chris@10 886 {
Chris@10 887 E T1S, T40, T1L, T3Z, T1K;
Chris@10 888 T1S = FMA(KP951056516, T1O, KP587785252 * T1R);
Chris@10 889 T40 = FNMS(KP587785252, T1O, KP951056516 * T1R);
Chris@10 890 T1K = FNMS(KP250000000, TA, Tt);
Chris@10 891 T1L = T1J + T1K;
Chris@10 892 T3Z = T1K - T1J;
Chris@10 893 T1T = T1L + T1S;
Chris@10 894 T4N = T3Z + T40;
Chris@10 895 T3c = T1L - T1S;
Chris@10 896 T41 = T3Z - T40;
Chris@10 897 }
Chris@10 898 {
Chris@10 899 E T2q, T49, T2n, T4a, T2m;
Chris@10 900 T2q = FMA(KP951056516, T2o, KP587785252 * T2p);
Chris@10 901 T49 = FNMS(KP587785252, T2o, KP951056516 * T2p);
Chris@10 902 T2m = FNMS(KP250000000, T2l, T2k);
Chris@10 903 T2n = T2j + T2m;
Chris@10 904 T4a = T2m - T2j;
Chris@10 905 T2r = T2n - T2q;
Chris@10 906 T4Q = T4a - T49;
Chris@10 907 T3e = T2q + T2n;
Chris@10 908 T4b = T49 + T4a;
Chris@10 909 }
Chris@10 910 {
Chris@10 911 E T2f, T47, T28, T46, T27;
Chris@10 912 T2f = FMA(KP951056516, T2b, KP587785252 * T2e);
Chris@10 913 T47 = FNMS(KP587785252, T2b, KP951056516 * T2e);
Chris@10 914 T27 = FNMS(KP250000000, TJ, TC);
Chris@10 915 T28 = T26 + T27;
Chris@10 916 T46 = T27 - T26;
Chris@10 917 T2g = T28 + T2f;
Chris@10 918 T4R = T46 + T47;
Chris@10 919 T3f = T28 - T2f;
Chris@10 920 T48 = T46 - T47;
Chris@10 921 }
Chris@10 922 {
Chris@10 923 E T23, T42, T20, T43, T1Z;
Chris@10 924 T23 = FMA(KP951056516, T21, KP587785252 * T22);
Chris@10 925 T42 = FNMS(KP587785252, T21, KP951056516 * T22);
Chris@10 926 T1Z = FNMS(KP250000000, T1Y, T1X);
Chris@10 927 T20 = T1W + T1Z;
Chris@10 928 T43 = T1Z - T1W;
Chris@10 929 T24 = T20 - T23;
Chris@10 930 T4O = T43 - T42;
Chris@10 931 T3b = T23 + T20;
Chris@10 932 T44 = T42 + T43;
Chris@10 933 }
Chris@10 934 }
Chris@10 935 {
Chris@10 936 E T5j, TM, T5k, T5s, T5u, T5o, T5r, T5t, T5l;
Chris@10 937 T5j = KP559016994 * (Ts - TL);
Chris@10 938 TM = Ts + TL;
Chris@10 939 T5k = FNMS(KP250000000, TM, T9);
Chris@10 940 T5o = T5m - T5n;
Chris@10 941 T5r = T5p - T5q;
Chris@10 942 T5s = FMA(KP951056516, T5o, KP587785252 * T5r);
Chris@10 943 T5u = FNMS(KP587785252, T5o, KP951056516 * T5r);
Chris@10 944 ro[0] = T9 + TM;
Chris@10 945 T5t = T5k - T5j;
Chris@10 946 ro[WS(os, 10)] = T5t - T5u;
Chris@10 947 ro[WS(os, 15)] = T5t + T5u;
Chris@10 948 T5l = T5j + T5k;
Chris@10 949 ro[WS(os, 20)] = T5l - T5s;
Chris@10 950 ro[WS(os, 5)] = T5l + T5s;
Chris@10 951 }
Chris@10 952 {
Chris@10 953 E T5x, T5z, T5A, T5E, T5F, T5C, T5D, T5G, T5B;
Chris@10 954 T5x = KP559016994 * (T5v - T5w);
Chris@10 955 T5z = T5v + T5w;
Chris@10 956 T5A = FNMS(KP250000000, T5z, T5y);
Chris@10 957 T5C = Ti - Tr;
Chris@10 958 T5D = TB - TK;
Chris@10 959 T5E = FMA(KP951056516, T5C, KP587785252 * T5D);
Chris@10 960 T5F = FNMS(KP587785252, T5C, KP951056516 * T5D);
Chris@10 961 io[0] = T5y + T5z;
Chris@10 962 T5G = T5A - T5x;
Chris@10 963 io[WS(os, 10)] = T5F + T5G;
Chris@10 964 io[WS(os, 15)] = T5G - T5F;
Chris@10 965 T5B = T5x + T5A;
Chris@10 966 io[WS(os, 5)] = T5B - T5E;
Chris@10 967 io[WS(os, 20)] = T5E + T5B;
Chris@10 968 }
Chris@10 969 {
Chris@10 970 E TX, T2U, T2u, T2Z, T2v, T2Y, T2A, T2V, T2D, T2J;
Chris@10 971 TX = TP + TW;
Chris@10 972 T2U = T2Q - T2T;
Chris@10 973 {
Chris@10 974 E T1k, T1H, T1I, T25, T2s, T2t;
Chris@10 975 T1k = FMA(KP968583161, T18, KP248689887 * T1j);
Chris@10 976 T1H = FMA(KP535826794, T1v, KP844327925 * T1G);
Chris@10 977 T1I = T1k + T1H;
Chris@10 978 T25 = FMA(KP876306680, T1T, KP481753674 * T24);
Chris@10 979 T2s = FMA(KP728968627, T2g, KP684547105 * T2r);
Chris@10 980 T2t = T25 + T2s;
Chris@10 981 T2u = T1I + T2t;
Chris@10 982 T2Z = T25 - T2s;
Chris@10 983 T2v = KP559016994 * (T1I - T2t);
Chris@10 984 T2Y = T1k - T1H;
Chris@10 985 }
Chris@10 986 {
Chris@10 987 E T2y, T2z, T2H, T2B, T2C, T2I;
Chris@10 988 T2y = FNMS(KP248689887, T18, KP968583161 * T1j);
Chris@10 989 T2z = FNMS(KP844327925, T1v, KP535826794 * T1G);
Chris@10 990 T2H = T2y + T2z;
Chris@10 991 T2B = FNMS(KP481753674, T1T, KP876306680 * T24);
Chris@10 992 T2C = FNMS(KP684547105, T2g, KP728968627 * T2r);
Chris@10 993 T2I = T2B + T2C;
Chris@10 994 T2A = T2y - T2z;
Chris@10 995 T2V = T2H + T2I;
Chris@10 996 T2D = T2B - T2C;
Chris@10 997 T2J = KP559016994 * (T2H - T2I);
Chris@10 998 }
Chris@10 999 ro[WS(os, 1)] = TX + T2u;
Chris@10 1000 io[WS(os, 1)] = T2U + T2V;
Chris@10 1001 {
Chris@10 1002 E T2E, T2G, T2x, T2F, T2w;
Chris@10 1003 T2E = FMA(KP951056516, T2A, KP587785252 * T2D);
Chris@10 1004 T2G = FNMS(KP587785252, T2A, KP951056516 * T2D);
Chris@10 1005 T2w = FNMS(KP250000000, T2u, TX);
Chris@10 1006 T2x = T2v + T2w;
Chris@10 1007 T2F = T2w - T2v;
Chris@10 1008 ro[WS(os, 21)] = T2x - T2E;
Chris@10 1009 ro[WS(os, 16)] = T2F + T2G;
Chris@10 1010 ro[WS(os, 6)] = T2x + T2E;
Chris@10 1011 ro[WS(os, 11)] = T2F - T2G;
Chris@10 1012 }
Chris@10 1013 {
Chris@10 1014 E T30, T31, T2X, T32, T2W;
Chris@10 1015 T30 = FMA(KP951056516, T2Y, KP587785252 * T2Z);
Chris@10 1016 T31 = FNMS(KP587785252, T2Y, KP951056516 * T2Z);
Chris@10 1017 T2W = FNMS(KP250000000, T2V, T2U);
Chris@10 1018 T2X = T2J + T2W;
Chris@10 1019 T32 = T2W - T2J;
Chris@10 1020 io[WS(os, 6)] = T2X - T30;
Chris@10 1021 io[WS(os, 16)] = T32 - T31;
Chris@10 1022 io[WS(os, 21)] = T30 + T2X;
Chris@10 1023 io[WS(os, 11)] = T31 + T32;
Chris@10 1024 }
Chris@10 1025 }
Chris@10 1026 {
Chris@10 1027 E T4F, T52, T4U, T5b, T56, T57, T51, T5f, T53, T5e;
Chris@10 1028 T4F = T3H + T3I;
Chris@10 1029 T52 = T4v - T4u;
Chris@10 1030 {
Chris@10 1031 E T4I, T4L, T4M, T4P, T4S, T4T;
Chris@10 1032 T4I = FMA(KP728968627, T4G, KP684547105 * T4H);
Chris@10 1033 T4L = FNMS(KP992114701, T4K, KP125333233 * T4J);
Chris@10 1034 T4M = T4I + T4L;
Chris@10 1035 T4P = FMA(KP062790519, T4N, KP998026728 * T4O);
Chris@10 1036 T4S = FNMS(KP637423989, T4R, KP770513242 * T4Q);
Chris@10 1037 T4T = T4P + T4S;
Chris@10 1038 T4U = T4M + T4T;
Chris@10 1039 T5b = KP559016994 * (T4M - T4T);
Chris@10 1040 T56 = T4I - T4L;
Chris@10 1041 T57 = T4P - T4S;
Chris@10 1042 }
Chris@10 1043 {
Chris@10 1044 E T4V, T4W, T4X, T4Y, T4Z, T50;
Chris@10 1045 T4V = FNMS(KP684547105, T4G, KP728968627 * T4H);
Chris@10 1046 T4W = FMA(KP125333233, T4K, KP992114701 * T4J);
Chris@10 1047 T4X = T4V - T4W;
Chris@10 1048 T4Y = FNMS(KP998026728, T4N, KP062790519 * T4O);
Chris@10 1049 T4Z = FMA(KP770513242, T4R, KP637423989 * T4Q);
Chris@10 1050 T50 = T4Y - T4Z;
Chris@10 1051 T51 = KP559016994 * (T4X - T50);
Chris@10 1052 T5f = T4Y + T4Z;
Chris@10 1053 T53 = T4X + T50;
Chris@10 1054 T5e = T4V + T4W;
Chris@10 1055 }
Chris@10 1056 ro[WS(os, 3)] = T4F + T4U;
Chris@10 1057 io[WS(os, 3)] = T52 + T53;
Chris@10 1058 {
Chris@10 1059 E T58, T59, T55, T5a, T54;
Chris@10 1060 T58 = FMA(KP951056516, T56, KP587785252 * T57);
Chris@10 1061 T59 = FNMS(KP587785252, T56, KP951056516 * T57);
Chris@10 1062 T54 = FNMS(KP250000000, T53, T52);
Chris@10 1063 T55 = T51 + T54;
Chris@10 1064 T5a = T54 - T51;
Chris@10 1065 io[WS(os, 8)] = T55 - T58;
Chris@10 1066 io[WS(os, 18)] = T5a - T59;
Chris@10 1067 io[WS(os, 23)] = T58 + T55;
Chris@10 1068 io[WS(os, 13)] = T59 + T5a;
Chris@10 1069 }
Chris@10 1070 {
Chris@10 1071 E T5g, T5i, T5d, T5h, T5c;
Chris@10 1072 T5g = FMA(KP951056516, T5e, KP587785252 * T5f);
Chris@10 1073 T5i = FNMS(KP587785252, T5e, KP951056516 * T5f);
Chris@10 1074 T5c = FNMS(KP250000000, T4U, T4F);
Chris@10 1075 T5d = T5b + T5c;
Chris@10 1076 T5h = T5c - T5b;
Chris@10 1077 ro[WS(os, 23)] = T5d - T5g;
Chris@10 1078 ro[WS(os, 18)] = T5h + T5i;
Chris@10 1079 ro[WS(os, 8)] = T5d + T5g;
Chris@10 1080 ro[WS(os, 13)] = T5h - T5i;
Chris@10 1081 }
Chris@10 1082 }
Chris@10 1083 {
Chris@10 1084 E T3J, T4w, T4e, T4B, T4f, T4A, T4k, T4x, T4n, T4t;
Chris@10 1085 T3J = T3H - T3I;
Chris@10 1086 T4w = T4u + T4v;
Chris@10 1087 {
Chris@10 1088 E T3Q, T3X, T3Y, T45, T4c, T4d;
Chris@10 1089 T3Q = FMA(KP876306680, T3M, KP481753674 * T3P);
Chris@10 1090 T3X = FNMS(KP425779291, T3W, KP904827052 * T3T);
Chris@10 1091 T3Y = T3Q + T3X;
Chris@10 1092 T45 = FMA(KP535826794, T41, KP844327925 * T44);
Chris@10 1093 T4c = FMA(KP062790519, T48, KP998026728 * T4b);
Chris@10 1094 T4d = T45 + T4c;
Chris@10 1095 T4e = T3Y + T4d;
Chris@10 1096 T4B = T45 - T4c;
Chris@10 1097 T4f = KP559016994 * (T3Y - T4d);
Chris@10 1098 T4A = T3Q - T3X;
Chris@10 1099 }
Chris@10 1100 {
Chris@10 1101 E T4i, T4j, T4r, T4l, T4m, T4s;
Chris@10 1102 T4i = FNMS(KP481753674, T3M, KP876306680 * T3P);
Chris@10 1103 T4j = FMA(KP904827052, T3W, KP425779291 * T3T);
Chris@10 1104 T4r = T4i - T4j;
Chris@10 1105 T4l = FNMS(KP844327925, T41, KP535826794 * T44);
Chris@10 1106 T4m = FNMS(KP998026728, T48, KP062790519 * T4b);
Chris@10 1107 T4s = T4l + T4m;
Chris@10 1108 T4k = T4i + T4j;
Chris@10 1109 T4x = T4r + T4s;
Chris@10 1110 T4n = T4l - T4m;
Chris@10 1111 T4t = KP559016994 * (T4r - T4s);
Chris@10 1112 }
Chris@10 1113 ro[WS(os, 2)] = T3J + T4e;
Chris@10 1114 io[WS(os, 2)] = T4w + T4x;
Chris@10 1115 {
Chris@10 1116 E T4o, T4q, T4h, T4p, T4g;
Chris@10 1117 T4o = FMA(KP951056516, T4k, KP587785252 * T4n);
Chris@10 1118 T4q = FNMS(KP587785252, T4k, KP951056516 * T4n);
Chris@10 1119 T4g = FNMS(KP250000000, T4e, T3J);
Chris@10 1120 T4h = T4f + T4g;
Chris@10 1121 T4p = T4g - T4f;
Chris@10 1122 ro[WS(os, 22)] = T4h - T4o;
Chris@10 1123 ro[WS(os, 17)] = T4p + T4q;
Chris@10 1124 ro[WS(os, 7)] = T4h + T4o;
Chris@10 1125 ro[WS(os, 12)] = T4p - T4q;
Chris@10 1126 }
Chris@10 1127 {
Chris@10 1128 E T4C, T4D, T4z, T4E, T4y;
Chris@10 1129 T4C = FMA(KP951056516, T4A, KP587785252 * T4B);
Chris@10 1130 T4D = FNMS(KP587785252, T4A, KP951056516 * T4B);
Chris@10 1131 T4y = FNMS(KP250000000, T4x, T4w);
Chris@10 1132 T4z = T4t + T4y;
Chris@10 1133 T4E = T4y - T4t;
Chris@10 1134 io[WS(os, 7)] = T4z - T4C;
Chris@10 1135 io[WS(os, 17)] = T4E - T4D;
Chris@10 1136 io[WS(os, 22)] = T4C + T4z;
Chris@10 1137 io[WS(os, 12)] = T4D + T4E;
Chris@10 1138 }
Chris@10 1139 }
Chris@10 1140 {
Chris@10 1141 E T33, T3j, T3i, T3z, T3r, T3s, T3q, T3D, T3v, T3C;
Chris@10 1142 T33 = TP - TW;
Chris@10 1143 T3j = T2T + T2Q;
Chris@10 1144 {
Chris@10 1145 E T36, T39, T3a, T3d, T3g, T3h;
Chris@10 1146 T36 = FMA(KP535826794, T34, KP844327925 * T35);
Chris@10 1147 T39 = FMA(KP637423989, T37, KP770513242 * T38);
Chris@10 1148 T3a = T36 - T39;
Chris@10 1149 T3d = FNMS(KP425779291, T3c, KP904827052 * T3b);
Chris@10 1150 T3g = FNMS(KP992114701, T3f, KP125333233 * T3e);
Chris@10 1151 T3h = T3d + T3g;
Chris@10 1152 T3i = T3a + T3h;
Chris@10 1153 T3z = KP559016994 * (T3a - T3h);
Chris@10 1154 T3r = T3d - T3g;
Chris@10 1155 T3s = T36 + T39;
Chris@10 1156 }
Chris@10 1157 {
Chris@10 1158 E T3k, T3l, T3m, T3n, T3o, T3p;
Chris@10 1159 T3k = FNMS(KP844327925, T34, KP535826794 * T35);
Chris@10 1160 T3l = FNMS(KP637423989, T38, KP770513242 * T37);
Chris@10 1161 T3m = T3k + T3l;
Chris@10 1162 T3n = FMA(KP904827052, T3c, KP425779291 * T3b);
Chris@10 1163 T3o = FMA(KP125333233, T3f, KP992114701 * T3e);
Chris@10 1164 T3p = T3n + T3o;
Chris@10 1165 T3q = T3m - T3p;
Chris@10 1166 T3D = T3o - T3n;
Chris@10 1167 T3v = KP559016994 * (T3m + T3p);
Chris@10 1168 T3C = T3k - T3l;
Chris@10 1169 }
Chris@10 1170 ro[WS(os, 4)] = T33 + T3i;
Chris@10 1171 io[WS(os, 4)] = T3j + T3q;
Chris@10 1172 {
Chris@10 1173 E T3t, T3y, T3w, T3x, T3u;
Chris@10 1174 T3t = FNMS(KP587785252, T3s, KP951056516 * T3r);
Chris@10 1175 T3y = FMA(KP951056516, T3s, KP587785252 * T3r);
Chris@10 1176 T3u = FNMS(KP250000000, T3q, T3j);
Chris@10 1177 T3w = T3u - T3v;
Chris@10 1178 T3x = T3u + T3v;
Chris@10 1179 io[WS(os, 14)] = T3t + T3w;
Chris@10 1180 io[WS(os, 24)] = T3y + T3x;
Chris@10 1181 io[WS(os, 19)] = T3w - T3t;
Chris@10 1182 io[WS(os, 9)] = T3x - T3y;
Chris@10 1183 }
Chris@10 1184 {
Chris@10 1185 E T3E, T3G, T3B, T3F, T3A;
Chris@10 1186 T3E = FMA(KP951056516, T3C, KP587785252 * T3D);
Chris@10 1187 T3G = FNMS(KP587785252, T3C, KP951056516 * T3D);
Chris@10 1188 T3A = FNMS(KP250000000, T3i, T33);
Chris@10 1189 T3B = T3z + T3A;
Chris@10 1190 T3F = T3A - T3z;
Chris@10 1191 ro[WS(os, 24)] = T3B - T3E;
Chris@10 1192 ro[WS(os, 19)] = T3F + T3G;
Chris@10 1193 ro[WS(os, 9)] = T3B + T3E;
Chris@10 1194 ro[WS(os, 14)] = T3F - T3G;
Chris@10 1195 }
Chris@10 1196 }
Chris@10 1197 }
Chris@10 1198 }
Chris@10 1199 }
Chris@10 1200
Chris@10 1201 static const kdft_desc desc = { 25, "n1_25", {260, 92, 92, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 1202
Chris@10 1203 void X(codelet_n1_25) (planner *p) {
Chris@10 1204 X(kdft_register) (p, n1_25, &desc);
Chris@10 1205 }
Chris@10 1206
Chris@10 1207 #endif /* HAVE_FMA */