annotate src/fftw-3.3.8/dft/scalar/codelets/n1_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:12 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 352 FP additions, 268 FP multiplications,
Chris@82 32 * (or, 84 additions, 0 multiplications, 268 fused multiply/add),
Chris@82 33 * 128 stack variables, 47 constants, and 100 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 40 DK(KP554608978, +0.554608978404018097464974850792216217022558774);
Chris@82 41 DK(KP248028675, +0.248028675328619457762448260696444630363259177);
Chris@82 42 DK(KP726211448, +0.726211448929902658173535992263577167607493062);
Chris@82 43 DK(KP525970792, +0.525970792408939708442463226536226366643874659);
Chris@82 44 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 45 DK(KP851038619, +0.851038619207379630836264138867114231259902550);
Chris@82 46 DK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 47 DK(KP912018591, +0.912018591466481957908415381764119056233607330);
Chris@82 48 DK(KP943557151, +0.943557151597354104399655195398983005179443399);
Chris@82 49 DK(KP614372930, +0.614372930789563808870829930444362096004872855);
Chris@82 50 DK(KP621716863, +0.621716863012209892444754556304102309693593202);
Chris@82 51 DK(KP994076283, +0.994076283785401014123185814696322018529298887);
Chris@82 52 DK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 53 DK(KP126329378, +0.126329378446108174786050455341811215027378105);
Chris@82 54 DK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 55 DK(KP827271945, +0.827271945972475634034355757144307982555673741);
Chris@82 56 DK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 57 DK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 58 DK(KP557913902, +0.557913902031834264187699648465567037992437152);
Chris@82 59 DK(KP249506682, +0.249506682107067890488084201715862638334226305);
Chris@82 60 DK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 61 DK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 62 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 63 DK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 64 DK(KP968479752, +0.968479752739016373193524836781420152702090879);
Chris@82 65 DK(KP470564281, +0.470564281212251493087595091036643380879947982);
Chris@82 66 DK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 67 DK(KP062914667, +0.062914667253649757225485955897349402364686947);
Chris@82 68 DK(KP833417178, +0.833417178328688677408962550243238843138996060);
Chris@82 69 DK(KP921177326, +0.921177326965143320250447435415066029359282231);
Chris@82 70 DK(KP541454447, +0.541454447536312777046285590082819509052033189);
Chris@82 71 DK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 72 DK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 73 DK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 74 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 75 DK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 76 DK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 77 DK(KP939062505, +0.939062505817492352556001843133229685779824606);
Chris@82 78 DK(KP549754652, +0.549754652192770074288023275540779861653779767);
Chris@82 79 DK(KP871714437, +0.871714437527667770979999223229522602943903653);
Chris@82 80 DK(KP634619297, +0.634619297544148100711287640319130485732531031);
Chris@82 81 DK(KP256756360, +0.256756360367726783319498520922669048172391148);
Chris@82 82 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 83 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 84 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 85 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 86 {
Chris@82 87 INT i;
Chris@82 88 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) {
Chris@82 89 E T9, T4Q, T1U, T3b, T45, T1D, T46, T3e, T1R, T4P, Ti, Tr, Ts, TY, T17;
Chris@82 90 E T1E, T22, T5f, T3z, T4z, T2o, T5b, T3C, T4s, T2h, T5c, T3D, T4p, T29, T5e;
Chris@82 91 E T3A, T4w, TB, TK, TL, T1h, T1q, T1F, T2x, T57, T3v, T4a, T2T, T55, T3s;
Chris@82 92 E T4k, T2M, T54, T3t, T4h, T2E, T58, T3w, T4d;
Chris@82 93 {
Chris@82 94 E T1, T4, T7, T8, T1T, T1S, T39, T3a;
Chris@82 95 T1 = ri[0];
Chris@82 96 {
Chris@82 97 E T2, T3, T5, T6;
Chris@82 98 T2 = ri[WS(is, 5)];
Chris@82 99 T3 = ri[WS(is, 20)];
Chris@82 100 T4 = T2 + T3;
Chris@82 101 T5 = ri[WS(is, 10)];
Chris@82 102 T6 = ri[WS(is, 15)];
Chris@82 103 T7 = T5 + T6;
Chris@82 104 T8 = T4 + T7;
Chris@82 105 T1T = T5 - T6;
Chris@82 106 T1S = T2 - T3;
Chris@82 107 }
Chris@82 108 T9 = T1 + T8;
Chris@82 109 T4Q = FNMS(KP618033988, T1S, T1T);
Chris@82 110 T1U = FMA(KP618033988, T1T, T1S);
Chris@82 111 T39 = FNMS(KP250000000, T8, T1);
Chris@82 112 T3a = T4 - T7;
Chris@82 113 T3b = FMA(KP559016994, T3a, T39);
Chris@82 114 T45 = FNMS(KP559016994, T3a, T39);
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E T1v, T1y, T1B, T1C, T3d, T3c, T1P, T1Q;
Chris@82 118 T1v = ii[0];
Chris@82 119 {
Chris@82 120 E T1w, T1x, T1z, T1A;
Chris@82 121 T1w = ii[WS(is, 5)];
Chris@82 122 T1x = ii[WS(is, 20)];
Chris@82 123 T1y = T1w + T1x;
Chris@82 124 T1z = ii[WS(is, 10)];
Chris@82 125 T1A = ii[WS(is, 15)];
Chris@82 126 T1B = T1z + T1A;
Chris@82 127 T1C = T1y + T1B;
Chris@82 128 T3d = T1z - T1A;
Chris@82 129 T3c = T1w - T1x;
Chris@82 130 }
Chris@82 131 T1D = T1v + T1C;
Chris@82 132 T46 = FNMS(KP618033988, T3c, T3d);
Chris@82 133 T3e = FMA(KP618033988, T3d, T3c);
Chris@82 134 T1P = FNMS(KP250000000, T1C, T1v);
Chris@82 135 T1Q = T1y - T1B;
Chris@82 136 T1R = FMA(KP559016994, T1Q, T1P);
Chris@82 137 T4P = FNMS(KP559016994, T1Q, T1P);
Chris@82 138 }
Chris@82 139 {
Chris@82 140 E Ta, TQ, Tj, TZ, Th, T24, T1Z, T20, TX, T27, T1X, T26, Tq, T2m, T2c;
Chris@82 141 E T2l, T16, T2j, T2e, T2f;
Chris@82 142 Ta = ri[WS(is, 1)];
Chris@82 143 TQ = ii[WS(is, 1)];
Chris@82 144 Tj = ri[WS(is, 4)];
Chris@82 145 TZ = ii[WS(is, 4)];
Chris@82 146 {
Chris@82 147 E Tb, Tc, Td, Te, Tf, Tg;
Chris@82 148 Tb = ri[WS(is, 6)];
Chris@82 149 Tc = ri[WS(is, 21)];
Chris@82 150 Td = Tb + Tc;
Chris@82 151 Te = ri[WS(is, 11)];
Chris@82 152 Tf = ri[WS(is, 16)];
Chris@82 153 Tg = Te + Tf;
Chris@82 154 Th = Td + Tg;
Chris@82 155 T24 = Td - Tg;
Chris@82 156 T1Z = Tc - Tb;
Chris@82 157 T20 = Tf - Te;
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TR, TS, TT, TU, TV, TW;
Chris@82 161 TR = ii[WS(is, 6)];
Chris@82 162 TS = ii[WS(is, 21)];
Chris@82 163 TT = TR + TS;
Chris@82 164 TU = ii[WS(is, 11)];
Chris@82 165 TV = ii[WS(is, 16)];
Chris@82 166 TW = TU + TV;
Chris@82 167 TX = TT + TW;
Chris@82 168 T27 = TV - TU;
Chris@82 169 T1X = TT - TW;
Chris@82 170 T26 = TR - TS;
Chris@82 171 }
Chris@82 172 {
Chris@82 173 E Tk, Tl, Tm, Tn, To, Tp;
Chris@82 174 Tk = ri[WS(is, 9)];
Chris@82 175 Tl = ri[WS(is, 24)];
Chris@82 176 Tm = Tk + Tl;
Chris@82 177 Tn = ri[WS(is, 14)];
Chris@82 178 To = ri[WS(is, 19)];
Chris@82 179 Tp = Tn + To;
Chris@82 180 Tq = Tm + Tp;
Chris@82 181 T2m = To - Tn;
Chris@82 182 T2c = Tm - Tp;
Chris@82 183 T2l = Tl - Tk;
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E T10, T11, T12, T13, T14, T15;
Chris@82 187 T10 = ii[WS(is, 9)];
Chris@82 188 T11 = ii[WS(is, 24)];
Chris@82 189 T12 = T10 + T11;
Chris@82 190 T13 = ii[WS(is, 14)];
Chris@82 191 T14 = ii[WS(is, 19)];
Chris@82 192 T15 = T13 + T14;
Chris@82 193 T16 = T12 + T15;
Chris@82 194 T2j = T15 - T12;
Chris@82 195 T2e = T11 - T10;
Chris@82 196 T2f = T14 - T13;
Chris@82 197 }
Chris@82 198 Ti = Ta + Th;
Chris@82 199 Tr = Tj + Tq;
Chris@82 200 Ts = Ti + Tr;
Chris@82 201 TY = TQ + TX;
Chris@82 202 T17 = TZ + T16;
Chris@82 203 T1E = TY + T17;
Chris@82 204 {
Chris@82 205 E T21, T4y, T1Y, T4x, T1W;
Chris@82 206 T21 = FMA(KP618033988, T20, T1Z);
Chris@82 207 T4y = FNMS(KP618033988, T1Z, T20);
Chris@82 208 T1W = FNMS(KP250000000, TX, TQ);
Chris@82 209 T1Y = FMA(KP559016994, T1X, T1W);
Chris@82 210 T4x = FNMS(KP559016994, T1X, T1W);
Chris@82 211 T22 = FMA(KP951056516, T21, T1Y);
Chris@82 212 T5f = FNMS(KP951056516, T4y, T4x);
Chris@82 213 T3z = FNMS(KP951056516, T21, T1Y);
Chris@82 214 T4z = FMA(KP951056516, T4y, T4x);
Chris@82 215 }
Chris@82 216 {
Chris@82 217 E T2n, T4r, T2k, T4q, T2i;
Chris@82 218 T2n = FMA(KP618033988, T2m, T2l);
Chris@82 219 T4r = FNMS(KP618033988, T2l, T2m);
Chris@82 220 T2i = FNMS(KP250000000, T16, TZ);
Chris@82 221 T2k = FNMS(KP559016994, T2j, T2i);
Chris@82 222 T4q = FMA(KP559016994, T2j, T2i);
Chris@82 223 T2o = FMA(KP951056516, T2n, T2k);
Chris@82 224 T5b = FNMS(KP951056516, T4r, T4q);
Chris@82 225 T3C = FNMS(KP951056516, T2n, T2k);
Chris@82 226 T4s = FMA(KP951056516, T4r, T4q);
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E T2g, T4o, T2d, T4n, T2b;
Chris@82 230 T2g = FMA(KP618033988, T2f, T2e);
Chris@82 231 T4o = FNMS(KP618033988, T2e, T2f);
Chris@82 232 T2b = FMS(KP250000000, Tq, Tj);
Chris@82 233 T2d = FNMS(KP559016994, T2c, T2b);
Chris@82 234 T4n = FMA(KP559016994, T2c, T2b);
Chris@82 235 T2h = FMA(KP951056516, T2g, T2d);
Chris@82 236 T5c = FNMS(KP951056516, T4o, T4n);
Chris@82 237 T3D = FNMS(KP951056516, T2g, T2d);
Chris@82 238 T4p = FMA(KP951056516, T4o, T4n);
Chris@82 239 }
Chris@82 240 {
Chris@82 241 E T28, T4v, T25, T4u, T23;
Chris@82 242 T28 = FNMS(KP618033988, T27, T26);
Chris@82 243 T4v = FMA(KP618033988, T26, T27);
Chris@82 244 T23 = FNMS(KP250000000, Th, Ta);
Chris@82 245 T25 = FMA(KP559016994, T24, T23);
Chris@82 246 T4u = FNMS(KP559016994, T24, T23);
Chris@82 247 T29 = FMA(KP951056516, T28, T25);
Chris@82 248 T5e = FMA(KP951056516, T4v, T4u);
Chris@82 249 T3A = FNMS(KP951056516, T28, T25);
Chris@82 250 T4w = FNMS(KP951056516, T4v, T4u);
Chris@82 251 }
Chris@82 252 }
Chris@82 253 {
Chris@82 254 E Tt, T19, TC, T1i, TA, T2z, T2u, T2v, T1g, T2C, T2s, T2B, TJ, T2O, T2J;
Chris@82 255 E T2K, T1p, T2R, T2H, T2Q;
Chris@82 256 Tt = ri[WS(is, 2)];
Chris@82 257 T19 = ii[WS(is, 2)];
Chris@82 258 TC = ri[WS(is, 3)];
Chris@82 259 T1i = ii[WS(is, 3)];
Chris@82 260 {
Chris@82 261 E Tu, Tv, Tw, Tx, Ty, Tz;
Chris@82 262 Tu = ri[WS(is, 7)];
Chris@82 263 Tv = ri[WS(is, 22)];
Chris@82 264 Tw = Tu + Tv;
Chris@82 265 Tx = ri[WS(is, 12)];
Chris@82 266 Ty = ri[WS(is, 17)];
Chris@82 267 Tz = Tx + Ty;
Chris@82 268 TA = Tw + Tz;
Chris@82 269 T2z = Tz - Tw;
Chris@82 270 T2u = Tv - Tu;
Chris@82 271 T2v = Ty - Tx;
Chris@82 272 }
Chris@82 273 {
Chris@82 274 E T1a, T1b, T1c, T1d, T1e, T1f;
Chris@82 275 T1a = ii[WS(is, 7)];
Chris@82 276 T1b = ii[WS(is, 22)];
Chris@82 277 T1c = T1a + T1b;
Chris@82 278 T1d = ii[WS(is, 12)];
Chris@82 279 T1e = ii[WS(is, 17)];
Chris@82 280 T1f = T1d + T1e;
Chris@82 281 T1g = T1c + T1f;
Chris@82 282 T2C = T1d - T1e;
Chris@82 283 T2s = T1f - T1c;
Chris@82 284 T2B = T1b - T1a;
Chris@82 285 }
Chris@82 286 {
Chris@82 287 E TD, TE, TF, TG, TH, TI;
Chris@82 288 TD = ri[WS(is, 8)];
Chris@82 289 TE = ri[WS(is, 23)];
Chris@82 290 TF = TD + TE;
Chris@82 291 TG = ri[WS(is, 13)];
Chris@82 292 TH = ri[WS(is, 18)];
Chris@82 293 TI = TG + TH;
Chris@82 294 TJ = TF + TI;
Chris@82 295 T2O = TI - TF;
Chris@82 296 T2J = TD - TE;
Chris@82 297 T2K = TG - TH;
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E T1j, T1k, T1l, T1m, T1n, T1o;
Chris@82 301 T1j = ii[WS(is, 8)];
Chris@82 302 T1k = ii[WS(is, 23)];
Chris@82 303 T1l = T1j + T1k;
Chris@82 304 T1m = ii[WS(is, 13)];
Chris@82 305 T1n = ii[WS(is, 18)];
Chris@82 306 T1o = T1m + T1n;
Chris@82 307 T1p = T1l + T1o;
Chris@82 308 T2R = T1n - T1m;
Chris@82 309 T2H = T1o - T1l;
Chris@82 310 T2Q = T1k - T1j;
Chris@82 311 }
Chris@82 312 TB = Tt + TA;
Chris@82 313 TK = TC + TJ;
Chris@82 314 TL = TB + TK;
Chris@82 315 T1h = T19 + T1g;
Chris@82 316 T1q = T1i + T1p;
Chris@82 317 T1F = T1h + T1q;
Chris@82 318 {
Chris@82 319 E T2w, T49, T2t, T48, T2r;
Chris@82 320 T2w = FMA(KP618033988, T2v, T2u);
Chris@82 321 T49 = FNMS(KP618033988, T2u, T2v);
Chris@82 322 T2r = FNMS(KP250000000, T1g, T19);
Chris@82 323 T2t = FNMS(KP559016994, T2s, T2r);
Chris@82 324 T48 = FMA(KP559016994, T2s, T2r);
Chris@82 325 T2x = FMA(KP951056516, T2w, T2t);
Chris@82 326 T57 = FNMS(KP951056516, T49, T48);
Chris@82 327 T3v = FNMS(KP951056516, T2w, T2t);
Chris@82 328 T4a = FMA(KP951056516, T49, T48);
Chris@82 329 }
Chris@82 330 {
Chris@82 331 E T2S, T4j, T2P, T4i, T2N;
Chris@82 332 T2S = FMA(KP618033988, T2R, T2Q);
Chris@82 333 T4j = FNMS(KP618033988, T2Q, T2R);
Chris@82 334 T2N = FNMS(KP250000000, TJ, TC);
Chris@82 335 T2P = FNMS(KP559016994, T2O, T2N);
Chris@82 336 T4i = FMA(KP559016994, T2O, T2N);
Chris@82 337 T2T = FNMS(KP951056516, T2S, T2P);
Chris@82 338 T55 = FMA(KP951056516, T4j, T4i);
Chris@82 339 T3s = FMA(KP951056516, T2S, T2P);
Chris@82 340 T4k = FNMS(KP951056516, T4j, T4i);
Chris@82 341 }
Chris@82 342 {
Chris@82 343 E T2L, T4g, T2I, T4f, T2G;
Chris@82 344 T2L = FMA(KP618033988, T2K, T2J);
Chris@82 345 T4g = FNMS(KP618033988, T2J, T2K);
Chris@82 346 T2G = FNMS(KP250000000, T1p, T1i);
Chris@82 347 T2I = FNMS(KP559016994, T2H, T2G);
Chris@82 348 T4f = FMA(KP559016994, T2H, T2G);
Chris@82 349 T2M = FNMS(KP951056516, T2L, T2I);
Chris@82 350 T54 = FMA(KP951056516, T4g, T4f);
Chris@82 351 T3t = FMA(KP951056516, T2L, T2I);
Chris@82 352 T4h = FNMS(KP951056516, T4g, T4f);
Chris@82 353 }
Chris@82 354 {
Chris@82 355 E T2D, T4c, T2A, T4b, T2y;
Chris@82 356 T2D = FNMS(KP618033988, T2C, T2B);
Chris@82 357 T4c = FMA(KP618033988, T2B, T2C);
Chris@82 358 T2y = FNMS(KP250000000, TA, Tt);
Chris@82 359 T2A = FNMS(KP559016994, T2z, T2y);
Chris@82 360 T4b = FMA(KP559016994, T2z, T2y);
Chris@82 361 T2E = FNMS(KP951056516, T2D, T2A);
Chris@82 362 T58 = FNMS(KP951056516, T4c, T4b);
Chris@82 363 T3w = FMA(KP951056516, T2D, T2A);
Chris@82 364 T4d = FMA(KP951056516, T4c, T4b);
Chris@82 365 }
Chris@82 366 }
Chris@82 367 {
Chris@82 368 E TO, TM, TN, T1s, T1u, T18, T1r, T1t, TP;
Chris@82 369 TO = Ts - TL;
Chris@82 370 TM = Ts + TL;
Chris@82 371 TN = FNMS(KP250000000, TM, T9);
Chris@82 372 T18 = TY - T17;
Chris@82 373 T1r = T1h - T1q;
Chris@82 374 T1s = FMA(KP618033988, T1r, T18);
Chris@82 375 T1u = FNMS(KP618033988, T18, T1r);
Chris@82 376 ro[0] = T9 + TM;
Chris@82 377 T1t = FNMS(KP559016994, TO, TN);
Chris@82 378 ro[WS(os, 10)] = FNMS(KP951056516, T1u, T1t);
Chris@82 379 ro[WS(os, 15)] = FMA(KP951056516, T1u, T1t);
Chris@82 380 TP = FMA(KP559016994, TO, TN);
Chris@82 381 ro[WS(os, 20)] = FNMS(KP951056516, T1s, TP);
Chris@82 382 ro[WS(os, 5)] = FMA(KP951056516, T1s, TP);
Chris@82 383 }
Chris@82 384 {
Chris@82 385 E T1I, T1G, T1H, T1M, T1O, T1K, T1L, T1N, T1J;
Chris@82 386 T1I = T1E - T1F;
Chris@82 387 T1G = T1E + T1F;
Chris@82 388 T1H = FNMS(KP250000000, T1G, T1D);
Chris@82 389 T1K = Ti - Tr;
Chris@82 390 T1L = TB - TK;
Chris@82 391 T1M = FMA(KP618033988, T1L, T1K);
Chris@82 392 T1O = FNMS(KP618033988, T1K, T1L);
Chris@82 393 io[0] = T1D + T1G;
Chris@82 394 T1N = FNMS(KP559016994, T1I, T1H);
Chris@82 395 io[WS(os, 10)] = FMA(KP951056516, T1O, T1N);
Chris@82 396 io[WS(os, 15)] = FNMS(KP951056516, T1O, T1N);
Chris@82 397 T1J = FMA(KP559016994, T1I, T1H);
Chris@82 398 io[WS(os, 5)] = FNMS(KP951056516, T1M, T1J);
Chris@82 399 io[WS(os, 20)] = FMA(KP951056516, T1M, T1J);
Chris@82 400 }
Chris@82 401 {
Chris@82 402 E T1V, T3f, T2W, T3n, T2Y, T3m, T32, T3k, T35, T3i;
Chris@82 403 T1V = FNMS(KP951056516, T1U, T1R);
Chris@82 404 T3f = FMA(KP951056516, T3e, T3b);
Chris@82 405 {
Chris@82 406 E T2a, T2p, T2q, T2F, T2U, T2V;
Chris@82 407 T2a = FNMS(KP256756360, T29, T22);
Chris@82 408 T2p = FMA(KP634619297, T2o, T2h);
Chris@82 409 T2q = FMA(KP871714437, T2p, T2a);
Chris@82 410 T2F = FNMS(KP549754652, T2E, T2x);
Chris@82 411 T2U = FNMS(KP939062505, T2T, T2M);
Chris@82 412 T2V = FMA(KP831864738, T2U, T2F);
Chris@82 413 T2W = FMA(KP904730450, T2V, T2q);
Chris@82 414 T3n = FNMS(KP831864738, T2U, T2F);
Chris@82 415 T2Y = FNMS(KP904730450, T2V, T2q);
Chris@82 416 T3m = FNMS(KP871714437, T2p, T2a);
Chris@82 417 }
Chris@82 418 {
Chris@82 419 E T30, T31, T3g, T33, T34, T3h;
Chris@82 420 T30 = FMA(KP256756360, T22, T29);
Chris@82 421 T31 = FNMS(KP634619297, T2h, T2o);
Chris@82 422 T3g = FMA(KP871714437, T31, T30);
Chris@82 423 T33 = FMA(KP549754652, T2x, T2E);
Chris@82 424 T34 = FMA(KP939062505, T2M, T2T);
Chris@82 425 T3h = FMA(KP831864738, T34, T33);
Chris@82 426 T32 = FNMS(KP871714437, T31, T30);
Chris@82 427 T3k = FNMS(KP904730450, T3h, T3g);
Chris@82 428 T35 = FNMS(KP831864738, T34, T33);
Chris@82 429 T3i = FMA(KP904730450, T3h, T3g);
Chris@82 430 }
Chris@82 431 io[WS(os, 1)] = FMA(KP968583161, T2W, T1V);
Chris@82 432 ro[WS(os, 1)] = FMA(KP968583161, T3i, T3f);
Chris@82 433 {
Chris@82 434 E T36, T38, T2Z, T37, T2X;
Chris@82 435 T36 = FMA(KP559154169, T35, T32);
Chris@82 436 T38 = FNMS(KP683113946, T32, T35);
Chris@82 437 T2X = FNMS(KP242145790, T2W, T1V);
Chris@82 438 T2Z = FMA(KP541454447, T2Y, T2X);
Chris@82 439 T37 = FNMS(KP541454447, T2Y, T2X);
Chris@82 440 io[WS(os, 6)] = FNMS(KP921177326, T36, T2Z);
Chris@82 441 io[WS(os, 11)] = FMA(KP833417178, T38, T37);
Chris@82 442 io[WS(os, 21)] = FMA(KP921177326, T36, T2Z);
Chris@82 443 io[WS(os, 16)] = FNMS(KP833417178, T38, T37);
Chris@82 444 }
Chris@82 445 {
Chris@82 446 E T3o, T3q, T3l, T3p, T3j;
Chris@82 447 T3o = FMA(KP559154169, T3n, T3m);
Chris@82 448 T3q = FNMS(KP683113946, T3m, T3n);
Chris@82 449 T3j = FNMS(KP242145790, T3i, T3f);
Chris@82 450 T3l = FMA(KP541454447, T3k, T3j);
Chris@82 451 T3p = FNMS(KP541454447, T3k, T3j);
Chris@82 452 ro[WS(os, 6)] = FMA(KP921177326, T3o, T3l);
Chris@82 453 ro[WS(os, 16)] = FMA(KP833417178, T3q, T3p);
Chris@82 454 ro[WS(os, 21)] = FNMS(KP921177326, T3o, T3l);
Chris@82 455 ro[WS(os, 11)] = FNMS(KP833417178, T3q, T3p);
Chris@82 456 }
Chris@82 457 }
Chris@82 458 {
Chris@82 459 E T53, T5j, T5i, T5A, T5u, T5v, T5q, T5D, T5s, T5C;
Chris@82 460 T53 = FNMS(KP951056516, T46, T45);
Chris@82 461 T5j = FMA(KP951056516, T4Q, T4P);
Chris@82 462 {
Chris@82 463 E T56, T59, T5a, T5d, T5g, T5h;
Chris@82 464 T56 = FMA(KP062914667, T55, T54);
Chris@82 465 T59 = FMA(KP634619297, T58, T57);
Chris@82 466 T5a = FMA(KP845997307, T59, T56);
Chris@82 467 T5d = FMA(KP470564281, T5c, T5b);
Chris@82 468 T5g = FMA(KP549754652, T5f, T5e);
Chris@82 469 T5h = FMA(KP968479752, T5g, T5d);
Chris@82 470 T5i = FMA(KP906616052, T5h, T5a);
Chris@82 471 T5A = FNMS(KP906616052, T5h, T5a);
Chris@82 472 T5u = FNMS(KP845997307, T59, T56);
Chris@82 473 T5v = FNMS(KP968479752, T5g, T5d);
Chris@82 474 }
Chris@82 475 {
Chris@82 476 E T5k, T5l, T5m, T5n, T5o, T5p;
Chris@82 477 T5k = FNMS(KP062914667, T54, T55);
Chris@82 478 T5l = FNMS(KP634619297, T57, T58);
Chris@82 479 T5m = FMA(KP845997307, T5l, T5k);
Chris@82 480 T5n = FNMS(KP470564281, T5b, T5c);
Chris@82 481 T5o = FNMS(KP549754652, T5e, T5f);
Chris@82 482 T5p = FMA(KP968479752, T5o, T5n);
Chris@82 483 T5q = FNMS(KP906616052, T5p, T5m);
Chris@82 484 T5D = FNMS(KP845997307, T5l, T5k);
Chris@82 485 T5s = FMA(KP906616052, T5p, T5m);
Chris@82 486 T5C = FNMS(KP968479752, T5o, T5n);
Chris@82 487 }
Chris@82 488 ro[WS(os, 2)] = FMA(KP998026728, T5i, T53);
Chris@82 489 io[WS(os, 2)] = FNMS(KP998026728, T5q, T5j);
Chris@82 490 {
Chris@82 491 E T5w, T5y, T5t, T5x, T5r;
Chris@82 492 T5w = FNMS(KP560319534, T5v, T5u);
Chris@82 493 T5y = FMA(KP681693190, T5u, T5v);
Chris@82 494 T5r = FMA(KP249506682, T5q, T5j);
Chris@82 495 T5t = FNMS(KP557913902, T5s, T5r);
Chris@82 496 T5x = FMA(KP557913902, T5s, T5r);
Chris@82 497 io[WS(os, 12)] = FNMS(KP949179823, T5w, T5t);
Chris@82 498 io[WS(os, 22)] = FNMS(KP860541664, T5y, T5x);
Chris@82 499 io[WS(os, 17)] = FMA(KP949179823, T5w, T5t);
Chris@82 500 io[WS(os, 7)] = FMA(KP860541664, T5y, T5x);
Chris@82 501 }
Chris@82 502 {
Chris@82 503 E T5E, T5G, T5B, T5F, T5z;
Chris@82 504 T5E = FNMS(KP681693190, T5D, T5C);
Chris@82 505 T5G = FMA(KP560319534, T5C, T5D);
Chris@82 506 T5z = FNMS(KP249506682, T5i, T53);
Chris@82 507 T5B = FNMS(KP557913902, T5A, T5z);
Chris@82 508 T5F = FMA(KP557913902, T5A, T5z);
Chris@82 509 ro[WS(os, 22)] = FMA(KP860541664, T5E, T5B);
Chris@82 510 ro[WS(os, 17)] = FMA(KP949179823, T5G, T5F);
Chris@82 511 ro[WS(os, 7)] = FNMS(KP860541664, T5E, T5B);
Chris@82 512 ro[WS(os, 12)] = FNMS(KP949179823, T5G, T5F);
Chris@82 513 }
Chris@82 514 }
Chris@82 515 {
Chris@82 516 E T47, T4R, T4C, T4Z, T4E, T4Y, T4I, T4W, T4L, T4U;
Chris@82 517 T47 = FMA(KP951056516, T46, T45);
Chris@82 518 T4R = FNMS(KP951056516, T4Q, T4P);
Chris@82 519 {
Chris@82 520 E T4e, T4l, T4m, T4t, T4A, T4B;
Chris@82 521 T4e = FMA(KP062914667, T4d, T4a);
Chris@82 522 T4l = FNMS(KP827271945, T4k, T4h);
Chris@82 523 T4m = FMA(KP772036680, T4l, T4e);
Chris@82 524 T4t = FMA(KP126329378, T4s, T4p);
Chris@82 525 T4A = FMA(KP939062505, T4z, T4w);
Chris@82 526 T4B = FMA(KP734762448, T4A, T4t);
Chris@82 527 T4C = FMA(KP994076283, T4B, T4m);
Chris@82 528 T4Z = FNMS(KP734762448, T4A, T4t);
Chris@82 529 T4E = FNMS(KP994076283, T4B, T4m);
Chris@82 530 T4Y = FNMS(KP772036680, T4l, T4e);
Chris@82 531 }
Chris@82 532 {
Chris@82 533 E T4G, T4H, T4T, T4J, T4K, T4S;
Chris@82 534 T4G = FNMS(KP126329378, T4p, T4s);
Chris@82 535 T4H = FNMS(KP939062505, T4w, T4z);
Chris@82 536 T4T = FNMS(KP734762448, T4H, T4G);
Chris@82 537 T4J = FNMS(KP062914667, T4a, T4d);
Chris@82 538 T4K = FMA(KP827271945, T4h, T4k);
Chris@82 539 T4S = FMA(KP772036680, T4K, T4J);
Chris@82 540 T4I = FMA(KP734762448, T4H, T4G);
Chris@82 541 T4W = FNMS(KP994076283, T4T, T4S);
Chris@82 542 T4L = FNMS(KP772036680, T4K, T4J);
Chris@82 543 T4U = FMA(KP994076283, T4T, T4S);
Chris@82 544 }
Chris@82 545 ro[WS(os, 3)] = FMA(KP998026728, T4C, T47);
Chris@82 546 io[WS(os, 3)] = FNMS(KP998026728, T4U, T4R);
Chris@82 547 {
Chris@82 548 E T4M, T4O, T4F, T4N, T4D;
Chris@82 549 T4M = FNMS(KP621716863, T4L, T4I);
Chris@82 550 T4O = FMA(KP614372930, T4I, T4L);
Chris@82 551 T4D = FNMS(KP249506682, T4C, T47);
Chris@82 552 T4F = FNMS(KP557913902, T4E, T4D);
Chris@82 553 T4N = FMA(KP557913902, T4E, T4D);
Chris@82 554 ro[WS(os, 23)] = FNMS(KP943557151, T4M, T4F);
Chris@82 555 ro[WS(os, 13)] = FMA(KP949179823, T4O, T4N);
Chris@82 556 ro[WS(os, 8)] = FMA(KP943557151, T4M, T4F);
Chris@82 557 ro[WS(os, 18)] = FNMS(KP949179823, T4O, T4N);
Chris@82 558 }
Chris@82 559 {
Chris@82 560 E T50, T52, T4X, T51, T4V;
Chris@82 561 T50 = FMA(KP614372930, T4Z, T4Y);
Chris@82 562 T52 = FNMS(KP621716863, T4Y, T4Z);
Chris@82 563 T4V = FMA(KP249506682, T4U, T4R);
Chris@82 564 T4X = FNMS(KP557913902, T4W, T4V);
Chris@82 565 T51 = FMA(KP557913902, T4W, T4V);
Chris@82 566 io[WS(os, 13)] = FMA(KP949179823, T50, T4X);
Chris@82 567 io[WS(os, 23)] = FNMS(KP943557151, T52, T51);
Chris@82 568 io[WS(os, 18)] = FNMS(KP949179823, T50, T4X);
Chris@82 569 io[WS(os, 8)] = FMA(KP943557151, T52, T51);
Chris@82 570 }
Chris@82 571 }
Chris@82 572 {
Chris@82 573 E T3r, T3H, T3G, T3Y, T3S, T3T, T3O, T41, T3Q, T40;
Chris@82 574 T3r = FNMS(KP951056516, T3e, T3b);
Chris@82 575 T3H = FMA(KP951056516, T1U, T1R);
Chris@82 576 {
Chris@82 577 E T3u, T3x, T3y, T3B, T3E, T3F;
Chris@82 578 T3u = FNMS(KP126329378, T3t, T3s);
Chris@82 579 T3x = FNMS(KP470564281, T3w, T3v);
Chris@82 580 T3y = FNMS(KP912018591, T3x, T3u);
Chris@82 581 T3B = FMA(KP634619297, T3A, T3z);
Chris@82 582 T3E = FNMS(KP827271945, T3D, T3C);
Chris@82 583 T3F = FNMS(KP912575812, T3E, T3B);
Chris@82 584 T3G = FNMS(KP851038619, T3F, T3y);
Chris@82 585 T3Y = FMA(KP851038619, T3F, T3y);
Chris@82 586 T3S = FMA(KP912018591, T3x, T3u);
Chris@82 587 T3T = FMA(KP912575812, T3E, T3B);
Chris@82 588 }
Chris@82 589 {
Chris@82 590 E T3I, T3J, T3K, T3L, T3M, T3N;
Chris@82 591 T3I = FMA(KP126329378, T3s, T3t);
Chris@82 592 T3J = FMA(KP470564281, T3v, T3w);
Chris@82 593 T3K = FMA(KP912018591, T3J, T3I);
Chris@82 594 T3L = FNMS(KP634619297, T3z, T3A);
Chris@82 595 T3M = FMA(KP827271945, T3C, T3D);
Chris@82 596 T3N = FMA(KP912575812, T3M, T3L);
Chris@82 597 T3O = FMA(KP851038619, T3N, T3K);
Chris@82 598 T41 = FNMS(KP912018591, T3J, T3I);
Chris@82 599 T3Q = FNMS(KP851038619, T3N, T3K);
Chris@82 600 T40 = FNMS(KP912575812, T3M, T3L);
Chris@82 601 }
Chris@82 602 ro[WS(os, 4)] = FNMS(KP992114701, T3G, T3r);
Chris@82 603 io[WS(os, 4)] = FNMS(KP992114701, T3O, T3H);
Chris@82 604 {
Chris@82 605 E T3U, T3W, T3R, T3V, T3P;
Chris@82 606 T3U = FNMS(KP525970792, T3T, T3S);
Chris@82 607 T3W = FMA(KP726211448, T3S, T3T);
Chris@82 608 T3P = FMA(KP248028675, T3O, T3H);
Chris@82 609 T3R = FNMS(KP554608978, T3Q, T3P);
Chris@82 610 T3V = FMA(KP554608978, T3Q, T3P);
Chris@82 611 io[WS(os, 14)] = FMA(KP943557151, T3U, T3R);
Chris@82 612 io[WS(os, 24)] = FMA(KP803003575, T3W, T3V);
Chris@82 613 io[WS(os, 19)] = FNMS(KP943557151, T3U, T3R);
Chris@82 614 io[WS(os, 9)] = FNMS(KP803003575, T3W, T3V);
Chris@82 615 }
Chris@82 616 {
Chris@82 617 E T42, T44, T3Z, T43, T3X;
Chris@82 618 T42 = FNMS(KP726211448, T41, T40);
Chris@82 619 T44 = FMA(KP525970792, T40, T41);
Chris@82 620 T3X = FMA(KP248028675, T3G, T3r);
Chris@82 621 T3Z = FMA(KP554608978, T3Y, T3X);
Chris@82 622 T43 = FNMS(KP554608978, T3Y, T3X);
Chris@82 623 ro[WS(os, 9)] = FNMS(KP803003575, T42, T3Z);
Chris@82 624 ro[WS(os, 19)] = FMA(KP943557151, T44, T43);
Chris@82 625 ro[WS(os, 24)] = FMA(KP803003575, T42, T3Z);
Chris@82 626 ro[WS(os, 14)] = FNMS(KP943557151, T44, T43);
Chris@82 627 }
Chris@82 628 }
Chris@82 629 }
Chris@82 630 }
Chris@82 631 }
Chris@82 632
Chris@82 633 static const kdft_desc desc = { 25, "n1_25", {84, 0, 268, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 634
Chris@82 635 void X(codelet_n1_25) (planner *p) {
Chris@82 636 X(kdft_register) (p, n1_25, &desc);
Chris@82 637 }
Chris@82 638
Chris@82 639 #else
Chris@82 640
Chris@82 641 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 25 -name n1_25 -include dft/scalar/n.h */
Chris@82 642
Chris@82 643 /*
Chris@82 644 * This function contains 352 FP additions, 184 FP multiplications,
Chris@82 645 * (or, 260 additions, 92 multiplications, 92 fused multiply/add),
Chris@82 646 * 101 stack variables, 20 constants, and 100 memory accesses
Chris@82 647 */
Chris@82 648 #include "dft/scalar/n.h"
Chris@82 649
Chris@82 650 static void n1_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 651 {
Chris@82 652 DK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 653 DK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 654 DK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 655 DK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 656 DK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 657 DK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 658 DK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 659 DK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 660 DK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 661 DK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 662 DK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 663 DK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 664 DK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 665 DK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 666 DK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 667 DK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 668 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 669 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 670 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 671 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 672 {
Chris@82 673 INT i;
Chris@82 674 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(100, is), MAKE_VOLATILE_STRIDE(100, os)) {
Chris@82 675 E T9, T4u, T2T, TP, T3H, TW, T5y, T3I, T2Q, T4v, Ti, Tr, Ts, T5m, T5n;
Chris@82 676 E T5v, T18, T4G, T34, T3M, T1G, T4J, T38, T3T, T1v, T4K, T37, T3W, T1j, T4H;
Chris@82 677 E T35, T3P, TB, TK, TL, T5p, T5q, T5w, T1T, T4N, T3c, T41, T2r, T4Q, T3e;
Chris@82 678 E T4b, T2g, T4R, T3f, T48, T24, T4O, T3b, T44;
Chris@82 679 {
Chris@82 680 E T1, T4, T7, T8, T2S, T2R, TN, TO;
Chris@82 681 T1 = ri[0];
Chris@82 682 {
Chris@82 683 E T2, T3, T5, T6;
Chris@82 684 T2 = ri[WS(is, 5)];
Chris@82 685 T3 = ri[WS(is, 20)];
Chris@82 686 T4 = T2 + T3;
Chris@82 687 T5 = ri[WS(is, 10)];
Chris@82 688 T6 = ri[WS(is, 15)];
Chris@82 689 T7 = T5 + T6;
Chris@82 690 T8 = T4 + T7;
Chris@82 691 T2S = T5 - T6;
Chris@82 692 T2R = T2 - T3;
Chris@82 693 }
Chris@82 694 T9 = T1 + T8;
Chris@82 695 T4u = FNMS(KP587785252, T2R, KP951056516 * T2S);
Chris@82 696 T2T = FMA(KP951056516, T2R, KP587785252 * T2S);
Chris@82 697 TN = KP559016994 * (T4 - T7);
Chris@82 698 TO = FNMS(KP250000000, T8, T1);
Chris@82 699 TP = TN + TO;
Chris@82 700 T3H = TO - TN;
Chris@82 701 }
Chris@82 702 {
Chris@82 703 E T2N, T2K, T2L, TS, T2O, TV, T2M, T2P;
Chris@82 704 T2N = ii[0];
Chris@82 705 {
Chris@82 706 E TQ, TR, TT, TU;
Chris@82 707 TQ = ii[WS(is, 5)];
Chris@82 708 TR = ii[WS(is, 20)];
Chris@82 709 T2K = TQ + TR;
Chris@82 710 TT = ii[WS(is, 10)];
Chris@82 711 TU = ii[WS(is, 15)];
Chris@82 712 T2L = TT + TU;
Chris@82 713 TS = TQ - TR;
Chris@82 714 T2O = T2K + T2L;
Chris@82 715 TV = TT - TU;
Chris@82 716 }
Chris@82 717 TW = FMA(KP951056516, TS, KP587785252 * TV);
Chris@82 718 T5y = T2N + T2O;
Chris@82 719 T3I = FNMS(KP587785252, TS, KP951056516 * TV);
Chris@82 720 T2M = KP559016994 * (T2K - T2L);
Chris@82 721 T2P = FNMS(KP250000000, T2O, T2N);
Chris@82 722 T2Q = T2M + T2P;
Chris@82 723 T4v = T2P - T2M;
Chris@82 724 }
Chris@82 725 {
Chris@82 726 E Ta, T1c, Tj, T1z, Th, T1h, TY, T1g, T13, T1d, T16, T1b, Tq, T1E, T1l;
Chris@82 727 E T1D, T1q, T1A, T1t, T1y;
Chris@82 728 Ta = ri[WS(is, 1)];
Chris@82 729 T1c = ii[WS(is, 1)];
Chris@82 730 Tj = ri[WS(is, 4)];
Chris@82 731 T1z = ii[WS(is, 4)];
Chris@82 732 {
Chris@82 733 E Tb, Tc, Td, Te, Tf, Tg;
Chris@82 734 Tb = ri[WS(is, 6)];
Chris@82 735 Tc = ri[WS(is, 21)];
Chris@82 736 Td = Tb + Tc;
Chris@82 737 Te = ri[WS(is, 11)];
Chris@82 738 Tf = ri[WS(is, 16)];
Chris@82 739 Tg = Te + Tf;
Chris@82 740 Th = Td + Tg;
Chris@82 741 T1h = Te - Tf;
Chris@82 742 TY = KP559016994 * (Td - Tg);
Chris@82 743 T1g = Tb - Tc;
Chris@82 744 }
Chris@82 745 {
Chris@82 746 E T11, T12, T19, T14, T15, T1a;
Chris@82 747 T11 = ii[WS(is, 6)];
Chris@82 748 T12 = ii[WS(is, 21)];
Chris@82 749 T19 = T11 + T12;
Chris@82 750 T14 = ii[WS(is, 11)];
Chris@82 751 T15 = ii[WS(is, 16)];
Chris@82 752 T1a = T14 + T15;
Chris@82 753 T13 = T11 - T12;
Chris@82 754 T1d = T19 + T1a;
Chris@82 755 T16 = T14 - T15;
Chris@82 756 T1b = KP559016994 * (T19 - T1a);
Chris@82 757 }
Chris@82 758 {
Chris@82 759 E Tk, Tl, Tm, Tn, To, Tp;
Chris@82 760 Tk = ri[WS(is, 9)];
Chris@82 761 Tl = ri[WS(is, 24)];
Chris@82 762 Tm = Tk + Tl;
Chris@82 763 Tn = ri[WS(is, 14)];
Chris@82 764 To = ri[WS(is, 19)];
Chris@82 765 Tp = Tn + To;
Chris@82 766 Tq = Tm + Tp;
Chris@82 767 T1E = Tn - To;
Chris@82 768 T1l = KP559016994 * (Tm - Tp);
Chris@82 769 T1D = Tk - Tl;
Chris@82 770 }
Chris@82 771 {
Chris@82 772 E T1o, T1p, T1w, T1r, T1s, T1x;
Chris@82 773 T1o = ii[WS(is, 9)];
Chris@82 774 T1p = ii[WS(is, 24)];
Chris@82 775 T1w = T1o + T1p;
Chris@82 776 T1r = ii[WS(is, 14)];
Chris@82 777 T1s = ii[WS(is, 19)];
Chris@82 778 T1x = T1r + T1s;
Chris@82 779 T1q = T1o - T1p;
Chris@82 780 T1A = T1w + T1x;
Chris@82 781 T1t = T1r - T1s;
Chris@82 782 T1y = KP559016994 * (T1w - T1x);
Chris@82 783 }
Chris@82 784 Ti = Ta + Th;
Chris@82 785 Tr = Tj + Tq;
Chris@82 786 Ts = Ti + Tr;
Chris@82 787 T5m = T1c + T1d;
Chris@82 788 T5n = T1z + T1A;
Chris@82 789 T5v = T5m + T5n;
Chris@82 790 {
Chris@82 791 E T17, T3L, T10, T3K, TZ;
Chris@82 792 T17 = FMA(KP951056516, T13, KP587785252 * T16);
Chris@82 793 T3L = FNMS(KP587785252, T13, KP951056516 * T16);
Chris@82 794 TZ = FNMS(KP250000000, Th, Ta);
Chris@82 795 T10 = TY + TZ;
Chris@82 796 T3K = TZ - TY;
Chris@82 797 T18 = T10 + T17;
Chris@82 798 T4G = T3K + T3L;
Chris@82 799 T34 = T10 - T17;
Chris@82 800 T3M = T3K - T3L;
Chris@82 801 }
Chris@82 802 {
Chris@82 803 E T1F, T3R, T1C, T3S, T1B;
Chris@82 804 T1F = FMA(KP951056516, T1D, KP587785252 * T1E);
Chris@82 805 T3R = FNMS(KP587785252, T1D, KP951056516 * T1E);
Chris@82 806 T1B = FNMS(KP250000000, T1A, T1z);
Chris@82 807 T1C = T1y + T1B;
Chris@82 808 T3S = T1B - T1y;
Chris@82 809 T1G = T1C - T1F;
Chris@82 810 T4J = T3S - T3R;
Chris@82 811 T38 = T1F + T1C;
Chris@82 812 T3T = T3R + T3S;
Chris@82 813 }
Chris@82 814 {
Chris@82 815 E T1u, T3V, T1n, T3U, T1m;
Chris@82 816 T1u = FMA(KP951056516, T1q, KP587785252 * T1t);
Chris@82 817 T3V = FNMS(KP587785252, T1q, KP951056516 * T1t);
Chris@82 818 T1m = FNMS(KP250000000, Tq, Tj);
Chris@82 819 T1n = T1l + T1m;
Chris@82 820 T3U = T1m - T1l;
Chris@82 821 T1v = T1n + T1u;
Chris@82 822 T4K = T3U + T3V;
Chris@82 823 T37 = T1n - T1u;
Chris@82 824 T3W = T3U - T3V;
Chris@82 825 }
Chris@82 826 {
Chris@82 827 E T1i, T3N, T1f, T3O, T1e;
Chris@82 828 T1i = FMA(KP951056516, T1g, KP587785252 * T1h);
Chris@82 829 T3N = FNMS(KP587785252, T1g, KP951056516 * T1h);
Chris@82 830 T1e = FNMS(KP250000000, T1d, T1c);
Chris@82 831 T1f = T1b + T1e;
Chris@82 832 T3O = T1e - T1b;
Chris@82 833 T1j = T1f - T1i;
Chris@82 834 T4H = T3O - T3N;
Chris@82 835 T35 = T1i + T1f;
Chris@82 836 T3P = T3N + T3O;
Chris@82 837 }
Chris@82 838 }
Chris@82 839 {
Chris@82 840 E Tt, T1X, TC, T2k, TA, T22, T1J, T21, T1O, T1Y, T1R, T1W, TJ, T2p, T26;
Chris@82 841 E T2o, T2b, T2l, T2e, T2j;
Chris@82 842 Tt = ri[WS(is, 2)];
Chris@82 843 T1X = ii[WS(is, 2)];
Chris@82 844 TC = ri[WS(is, 3)];
Chris@82 845 T2k = ii[WS(is, 3)];
Chris@82 846 {
Chris@82 847 E Tu, Tv, Tw, Tx, Ty, Tz;
Chris@82 848 Tu = ri[WS(is, 7)];
Chris@82 849 Tv = ri[WS(is, 22)];
Chris@82 850 Tw = Tu + Tv;
Chris@82 851 Tx = ri[WS(is, 12)];
Chris@82 852 Ty = ri[WS(is, 17)];
Chris@82 853 Tz = Tx + Ty;
Chris@82 854 TA = Tw + Tz;
Chris@82 855 T22 = Tx - Ty;
Chris@82 856 T1J = KP559016994 * (Tw - Tz);
Chris@82 857 T21 = Tu - Tv;
Chris@82 858 }
Chris@82 859 {
Chris@82 860 E T1M, T1N, T1U, T1P, T1Q, T1V;
Chris@82 861 T1M = ii[WS(is, 7)];
Chris@82 862 T1N = ii[WS(is, 22)];
Chris@82 863 T1U = T1M + T1N;
Chris@82 864 T1P = ii[WS(is, 12)];
Chris@82 865 T1Q = ii[WS(is, 17)];
Chris@82 866 T1V = T1P + T1Q;
Chris@82 867 T1O = T1M - T1N;
Chris@82 868 T1Y = T1U + T1V;
Chris@82 869 T1R = T1P - T1Q;
Chris@82 870 T1W = KP559016994 * (T1U - T1V);
Chris@82 871 }
Chris@82 872 {
Chris@82 873 E TD, TE, TF, TG, TH, TI;
Chris@82 874 TD = ri[WS(is, 8)];
Chris@82 875 TE = ri[WS(is, 23)];
Chris@82 876 TF = TD + TE;
Chris@82 877 TG = ri[WS(is, 13)];
Chris@82 878 TH = ri[WS(is, 18)];
Chris@82 879 TI = TG + TH;
Chris@82 880 TJ = TF + TI;
Chris@82 881 T2p = TG - TH;
Chris@82 882 T26 = KP559016994 * (TF - TI);
Chris@82 883 T2o = TD - TE;
Chris@82 884 }
Chris@82 885 {
Chris@82 886 E T29, T2a, T2h, T2c, T2d, T2i;
Chris@82 887 T29 = ii[WS(is, 8)];
Chris@82 888 T2a = ii[WS(is, 23)];
Chris@82 889 T2h = T29 + T2a;
Chris@82 890 T2c = ii[WS(is, 13)];
Chris@82 891 T2d = ii[WS(is, 18)];
Chris@82 892 T2i = T2c + T2d;
Chris@82 893 T2b = T29 - T2a;
Chris@82 894 T2l = T2h + T2i;
Chris@82 895 T2e = T2c - T2d;
Chris@82 896 T2j = KP559016994 * (T2h - T2i);
Chris@82 897 }
Chris@82 898 TB = Tt + TA;
Chris@82 899 TK = TC + TJ;
Chris@82 900 TL = TB + TK;
Chris@82 901 T5p = T1X + T1Y;
Chris@82 902 T5q = T2k + T2l;
Chris@82 903 T5w = T5p + T5q;
Chris@82 904 {
Chris@82 905 E T1S, T40, T1L, T3Z, T1K;
Chris@82 906 T1S = FMA(KP951056516, T1O, KP587785252 * T1R);
Chris@82 907 T40 = FNMS(KP587785252, T1O, KP951056516 * T1R);
Chris@82 908 T1K = FNMS(KP250000000, TA, Tt);
Chris@82 909 T1L = T1J + T1K;
Chris@82 910 T3Z = T1K - T1J;
Chris@82 911 T1T = T1L + T1S;
Chris@82 912 T4N = T3Z + T40;
Chris@82 913 T3c = T1L - T1S;
Chris@82 914 T41 = T3Z - T40;
Chris@82 915 }
Chris@82 916 {
Chris@82 917 E T2q, T49, T2n, T4a, T2m;
Chris@82 918 T2q = FMA(KP951056516, T2o, KP587785252 * T2p);
Chris@82 919 T49 = FNMS(KP587785252, T2o, KP951056516 * T2p);
Chris@82 920 T2m = FNMS(KP250000000, T2l, T2k);
Chris@82 921 T2n = T2j + T2m;
Chris@82 922 T4a = T2m - T2j;
Chris@82 923 T2r = T2n - T2q;
Chris@82 924 T4Q = T4a - T49;
Chris@82 925 T3e = T2q + T2n;
Chris@82 926 T4b = T49 + T4a;
Chris@82 927 }
Chris@82 928 {
Chris@82 929 E T2f, T47, T28, T46, T27;
Chris@82 930 T2f = FMA(KP951056516, T2b, KP587785252 * T2e);
Chris@82 931 T47 = FNMS(KP587785252, T2b, KP951056516 * T2e);
Chris@82 932 T27 = FNMS(KP250000000, TJ, TC);
Chris@82 933 T28 = T26 + T27;
Chris@82 934 T46 = T27 - T26;
Chris@82 935 T2g = T28 + T2f;
Chris@82 936 T4R = T46 + T47;
Chris@82 937 T3f = T28 - T2f;
Chris@82 938 T48 = T46 - T47;
Chris@82 939 }
Chris@82 940 {
Chris@82 941 E T23, T42, T20, T43, T1Z;
Chris@82 942 T23 = FMA(KP951056516, T21, KP587785252 * T22);
Chris@82 943 T42 = FNMS(KP587785252, T21, KP951056516 * T22);
Chris@82 944 T1Z = FNMS(KP250000000, T1Y, T1X);
Chris@82 945 T20 = T1W + T1Z;
Chris@82 946 T43 = T1Z - T1W;
Chris@82 947 T24 = T20 - T23;
Chris@82 948 T4O = T43 - T42;
Chris@82 949 T3b = T23 + T20;
Chris@82 950 T44 = T42 + T43;
Chris@82 951 }
Chris@82 952 }
Chris@82 953 {
Chris@82 954 E T5j, TM, T5k, T5s, T5u, T5o, T5r, T5t, T5l;
Chris@82 955 T5j = KP559016994 * (Ts - TL);
Chris@82 956 TM = Ts + TL;
Chris@82 957 T5k = FNMS(KP250000000, TM, T9);
Chris@82 958 T5o = T5m - T5n;
Chris@82 959 T5r = T5p - T5q;
Chris@82 960 T5s = FMA(KP951056516, T5o, KP587785252 * T5r);
Chris@82 961 T5u = FNMS(KP587785252, T5o, KP951056516 * T5r);
Chris@82 962 ro[0] = T9 + TM;
Chris@82 963 T5t = T5k - T5j;
Chris@82 964 ro[WS(os, 10)] = T5t - T5u;
Chris@82 965 ro[WS(os, 15)] = T5t + T5u;
Chris@82 966 T5l = T5j + T5k;
Chris@82 967 ro[WS(os, 20)] = T5l - T5s;
Chris@82 968 ro[WS(os, 5)] = T5l + T5s;
Chris@82 969 }
Chris@82 970 {
Chris@82 971 E T5x, T5z, T5A, T5E, T5F, T5C, T5D, T5G, T5B;
Chris@82 972 T5x = KP559016994 * (T5v - T5w);
Chris@82 973 T5z = T5v + T5w;
Chris@82 974 T5A = FNMS(KP250000000, T5z, T5y);
Chris@82 975 T5C = Ti - Tr;
Chris@82 976 T5D = TB - TK;
Chris@82 977 T5E = FMA(KP951056516, T5C, KP587785252 * T5D);
Chris@82 978 T5F = FNMS(KP587785252, T5C, KP951056516 * T5D);
Chris@82 979 io[0] = T5y + T5z;
Chris@82 980 T5G = T5A - T5x;
Chris@82 981 io[WS(os, 10)] = T5F + T5G;
Chris@82 982 io[WS(os, 15)] = T5G - T5F;
Chris@82 983 T5B = T5x + T5A;
Chris@82 984 io[WS(os, 5)] = T5B - T5E;
Chris@82 985 io[WS(os, 20)] = T5E + T5B;
Chris@82 986 }
Chris@82 987 {
Chris@82 988 E TX, T2U, T2u, T2Z, T2v, T2Y, T2A, T2V, T2D, T2J;
Chris@82 989 TX = TP + TW;
Chris@82 990 T2U = T2Q - T2T;
Chris@82 991 {
Chris@82 992 E T1k, T1H, T1I, T25, T2s, T2t;
Chris@82 993 T1k = FMA(KP968583161, T18, KP248689887 * T1j);
Chris@82 994 T1H = FMA(KP535826794, T1v, KP844327925 * T1G);
Chris@82 995 T1I = T1k + T1H;
Chris@82 996 T25 = FMA(KP876306680, T1T, KP481753674 * T24);
Chris@82 997 T2s = FMA(KP728968627, T2g, KP684547105 * T2r);
Chris@82 998 T2t = T25 + T2s;
Chris@82 999 T2u = T1I + T2t;
Chris@82 1000 T2Z = T25 - T2s;
Chris@82 1001 T2v = KP559016994 * (T1I - T2t);
Chris@82 1002 T2Y = T1k - T1H;
Chris@82 1003 }
Chris@82 1004 {
Chris@82 1005 E T2y, T2z, T2H, T2B, T2C, T2I;
Chris@82 1006 T2y = FNMS(KP248689887, T18, KP968583161 * T1j);
Chris@82 1007 T2z = FNMS(KP844327925, T1v, KP535826794 * T1G);
Chris@82 1008 T2H = T2y + T2z;
Chris@82 1009 T2B = FNMS(KP481753674, T1T, KP876306680 * T24);
Chris@82 1010 T2C = FNMS(KP684547105, T2g, KP728968627 * T2r);
Chris@82 1011 T2I = T2B + T2C;
Chris@82 1012 T2A = T2y - T2z;
Chris@82 1013 T2V = T2H + T2I;
Chris@82 1014 T2D = T2B - T2C;
Chris@82 1015 T2J = KP559016994 * (T2H - T2I);
Chris@82 1016 }
Chris@82 1017 ro[WS(os, 1)] = TX + T2u;
Chris@82 1018 io[WS(os, 1)] = T2U + T2V;
Chris@82 1019 {
Chris@82 1020 E T2E, T2G, T2x, T2F, T2w;
Chris@82 1021 T2E = FMA(KP951056516, T2A, KP587785252 * T2D);
Chris@82 1022 T2G = FNMS(KP587785252, T2A, KP951056516 * T2D);
Chris@82 1023 T2w = FNMS(KP250000000, T2u, TX);
Chris@82 1024 T2x = T2v + T2w;
Chris@82 1025 T2F = T2w - T2v;
Chris@82 1026 ro[WS(os, 21)] = T2x - T2E;
Chris@82 1027 ro[WS(os, 16)] = T2F + T2G;
Chris@82 1028 ro[WS(os, 6)] = T2x + T2E;
Chris@82 1029 ro[WS(os, 11)] = T2F - T2G;
Chris@82 1030 }
Chris@82 1031 {
Chris@82 1032 E T30, T31, T2X, T32, T2W;
Chris@82 1033 T30 = FMA(KP951056516, T2Y, KP587785252 * T2Z);
Chris@82 1034 T31 = FNMS(KP587785252, T2Y, KP951056516 * T2Z);
Chris@82 1035 T2W = FNMS(KP250000000, T2V, T2U);
Chris@82 1036 T2X = T2J + T2W;
Chris@82 1037 T32 = T2W - T2J;
Chris@82 1038 io[WS(os, 6)] = T2X - T30;
Chris@82 1039 io[WS(os, 16)] = T32 - T31;
Chris@82 1040 io[WS(os, 21)] = T30 + T2X;
Chris@82 1041 io[WS(os, 11)] = T31 + T32;
Chris@82 1042 }
Chris@82 1043 }
Chris@82 1044 {
Chris@82 1045 E T4F, T52, T4U, T5b, T56, T57, T51, T5f, T53, T5e;
Chris@82 1046 T4F = T3H + T3I;
Chris@82 1047 T52 = T4v - T4u;
Chris@82 1048 {
Chris@82 1049 E T4I, T4L, T4M, T4P, T4S, T4T;
Chris@82 1050 T4I = FMA(KP728968627, T4G, KP684547105 * T4H);
Chris@82 1051 T4L = FNMS(KP992114701, T4K, KP125333233 * T4J);
Chris@82 1052 T4M = T4I + T4L;
Chris@82 1053 T4P = FMA(KP062790519, T4N, KP998026728 * T4O);
Chris@82 1054 T4S = FNMS(KP637423989, T4R, KP770513242 * T4Q);
Chris@82 1055 T4T = T4P + T4S;
Chris@82 1056 T4U = T4M + T4T;
Chris@82 1057 T5b = KP559016994 * (T4M - T4T);
Chris@82 1058 T56 = T4I - T4L;
Chris@82 1059 T57 = T4P - T4S;
Chris@82 1060 }
Chris@82 1061 {
Chris@82 1062 E T4V, T4W, T4X, T4Y, T4Z, T50;
Chris@82 1063 T4V = FNMS(KP684547105, T4G, KP728968627 * T4H);
Chris@82 1064 T4W = FMA(KP125333233, T4K, KP992114701 * T4J);
Chris@82 1065 T4X = T4V - T4W;
Chris@82 1066 T4Y = FNMS(KP998026728, T4N, KP062790519 * T4O);
Chris@82 1067 T4Z = FMA(KP770513242, T4R, KP637423989 * T4Q);
Chris@82 1068 T50 = T4Y - T4Z;
Chris@82 1069 T51 = KP559016994 * (T4X - T50);
Chris@82 1070 T5f = T4Y + T4Z;
Chris@82 1071 T53 = T4X + T50;
Chris@82 1072 T5e = T4V + T4W;
Chris@82 1073 }
Chris@82 1074 ro[WS(os, 3)] = T4F + T4U;
Chris@82 1075 io[WS(os, 3)] = T52 + T53;
Chris@82 1076 {
Chris@82 1077 E T58, T59, T55, T5a, T54;
Chris@82 1078 T58 = FMA(KP951056516, T56, KP587785252 * T57);
Chris@82 1079 T59 = FNMS(KP587785252, T56, KP951056516 * T57);
Chris@82 1080 T54 = FNMS(KP250000000, T53, T52);
Chris@82 1081 T55 = T51 + T54;
Chris@82 1082 T5a = T54 - T51;
Chris@82 1083 io[WS(os, 8)] = T55 - T58;
Chris@82 1084 io[WS(os, 18)] = T5a - T59;
Chris@82 1085 io[WS(os, 23)] = T58 + T55;
Chris@82 1086 io[WS(os, 13)] = T59 + T5a;
Chris@82 1087 }
Chris@82 1088 {
Chris@82 1089 E T5g, T5i, T5d, T5h, T5c;
Chris@82 1090 T5g = FMA(KP951056516, T5e, KP587785252 * T5f);
Chris@82 1091 T5i = FNMS(KP587785252, T5e, KP951056516 * T5f);
Chris@82 1092 T5c = FNMS(KP250000000, T4U, T4F);
Chris@82 1093 T5d = T5b + T5c;
Chris@82 1094 T5h = T5c - T5b;
Chris@82 1095 ro[WS(os, 23)] = T5d - T5g;
Chris@82 1096 ro[WS(os, 18)] = T5h + T5i;
Chris@82 1097 ro[WS(os, 8)] = T5d + T5g;
Chris@82 1098 ro[WS(os, 13)] = T5h - T5i;
Chris@82 1099 }
Chris@82 1100 }
Chris@82 1101 {
Chris@82 1102 E T3J, T4w, T4e, T4B, T4f, T4A, T4k, T4x, T4n, T4t;
Chris@82 1103 T3J = T3H - T3I;
Chris@82 1104 T4w = T4u + T4v;
Chris@82 1105 {
Chris@82 1106 E T3Q, T3X, T3Y, T45, T4c, T4d;
Chris@82 1107 T3Q = FMA(KP876306680, T3M, KP481753674 * T3P);
Chris@82 1108 T3X = FNMS(KP425779291, T3W, KP904827052 * T3T);
Chris@82 1109 T3Y = T3Q + T3X;
Chris@82 1110 T45 = FMA(KP535826794, T41, KP844327925 * T44);
Chris@82 1111 T4c = FMA(KP062790519, T48, KP998026728 * T4b);
Chris@82 1112 T4d = T45 + T4c;
Chris@82 1113 T4e = T3Y + T4d;
Chris@82 1114 T4B = T45 - T4c;
Chris@82 1115 T4f = KP559016994 * (T3Y - T4d);
Chris@82 1116 T4A = T3Q - T3X;
Chris@82 1117 }
Chris@82 1118 {
Chris@82 1119 E T4i, T4j, T4r, T4l, T4m, T4s;
Chris@82 1120 T4i = FNMS(KP481753674, T3M, KP876306680 * T3P);
Chris@82 1121 T4j = FMA(KP904827052, T3W, KP425779291 * T3T);
Chris@82 1122 T4r = T4i - T4j;
Chris@82 1123 T4l = FNMS(KP844327925, T41, KP535826794 * T44);
Chris@82 1124 T4m = FNMS(KP998026728, T48, KP062790519 * T4b);
Chris@82 1125 T4s = T4l + T4m;
Chris@82 1126 T4k = T4i + T4j;
Chris@82 1127 T4x = T4r + T4s;
Chris@82 1128 T4n = T4l - T4m;
Chris@82 1129 T4t = KP559016994 * (T4r - T4s);
Chris@82 1130 }
Chris@82 1131 ro[WS(os, 2)] = T3J + T4e;
Chris@82 1132 io[WS(os, 2)] = T4w + T4x;
Chris@82 1133 {
Chris@82 1134 E T4o, T4q, T4h, T4p, T4g;
Chris@82 1135 T4o = FMA(KP951056516, T4k, KP587785252 * T4n);
Chris@82 1136 T4q = FNMS(KP587785252, T4k, KP951056516 * T4n);
Chris@82 1137 T4g = FNMS(KP250000000, T4e, T3J);
Chris@82 1138 T4h = T4f + T4g;
Chris@82 1139 T4p = T4g - T4f;
Chris@82 1140 ro[WS(os, 22)] = T4h - T4o;
Chris@82 1141 ro[WS(os, 17)] = T4p + T4q;
Chris@82 1142 ro[WS(os, 7)] = T4h + T4o;
Chris@82 1143 ro[WS(os, 12)] = T4p - T4q;
Chris@82 1144 }
Chris@82 1145 {
Chris@82 1146 E T4C, T4D, T4z, T4E, T4y;
Chris@82 1147 T4C = FMA(KP951056516, T4A, KP587785252 * T4B);
Chris@82 1148 T4D = FNMS(KP587785252, T4A, KP951056516 * T4B);
Chris@82 1149 T4y = FNMS(KP250000000, T4x, T4w);
Chris@82 1150 T4z = T4t + T4y;
Chris@82 1151 T4E = T4y - T4t;
Chris@82 1152 io[WS(os, 7)] = T4z - T4C;
Chris@82 1153 io[WS(os, 17)] = T4E - T4D;
Chris@82 1154 io[WS(os, 22)] = T4C + T4z;
Chris@82 1155 io[WS(os, 12)] = T4D + T4E;
Chris@82 1156 }
Chris@82 1157 }
Chris@82 1158 {
Chris@82 1159 E T33, T3j, T3i, T3z, T3r, T3s, T3q, T3D, T3v, T3C;
Chris@82 1160 T33 = TP - TW;
Chris@82 1161 T3j = T2T + T2Q;
Chris@82 1162 {
Chris@82 1163 E T36, T39, T3a, T3d, T3g, T3h;
Chris@82 1164 T36 = FMA(KP535826794, T34, KP844327925 * T35);
Chris@82 1165 T39 = FMA(KP637423989, T37, KP770513242 * T38);
Chris@82 1166 T3a = T36 - T39;
Chris@82 1167 T3d = FNMS(KP425779291, T3c, KP904827052 * T3b);
Chris@82 1168 T3g = FNMS(KP992114701, T3f, KP125333233 * T3e);
Chris@82 1169 T3h = T3d + T3g;
Chris@82 1170 T3i = T3a + T3h;
Chris@82 1171 T3z = KP559016994 * (T3a - T3h);
Chris@82 1172 T3r = T3d - T3g;
Chris@82 1173 T3s = T36 + T39;
Chris@82 1174 }
Chris@82 1175 {
Chris@82 1176 E T3k, T3l, T3m, T3n, T3o, T3p;
Chris@82 1177 T3k = FNMS(KP844327925, T34, KP535826794 * T35);
Chris@82 1178 T3l = FNMS(KP637423989, T38, KP770513242 * T37);
Chris@82 1179 T3m = T3k + T3l;
Chris@82 1180 T3n = FMA(KP904827052, T3c, KP425779291 * T3b);
Chris@82 1181 T3o = FMA(KP125333233, T3f, KP992114701 * T3e);
Chris@82 1182 T3p = T3n + T3o;
Chris@82 1183 T3q = T3m - T3p;
Chris@82 1184 T3D = T3o - T3n;
Chris@82 1185 T3v = KP559016994 * (T3m + T3p);
Chris@82 1186 T3C = T3k - T3l;
Chris@82 1187 }
Chris@82 1188 ro[WS(os, 4)] = T33 + T3i;
Chris@82 1189 io[WS(os, 4)] = T3j + T3q;
Chris@82 1190 {
Chris@82 1191 E T3t, T3y, T3w, T3x, T3u;
Chris@82 1192 T3t = FNMS(KP587785252, T3s, KP951056516 * T3r);
Chris@82 1193 T3y = FMA(KP951056516, T3s, KP587785252 * T3r);
Chris@82 1194 T3u = FNMS(KP250000000, T3q, T3j);
Chris@82 1195 T3w = T3u - T3v;
Chris@82 1196 T3x = T3u + T3v;
Chris@82 1197 io[WS(os, 14)] = T3t + T3w;
Chris@82 1198 io[WS(os, 24)] = T3y + T3x;
Chris@82 1199 io[WS(os, 19)] = T3w - T3t;
Chris@82 1200 io[WS(os, 9)] = T3x - T3y;
Chris@82 1201 }
Chris@82 1202 {
Chris@82 1203 E T3E, T3G, T3B, T3F, T3A;
Chris@82 1204 T3E = FMA(KP951056516, T3C, KP587785252 * T3D);
Chris@82 1205 T3G = FNMS(KP587785252, T3C, KP951056516 * T3D);
Chris@82 1206 T3A = FNMS(KP250000000, T3i, T33);
Chris@82 1207 T3B = T3z + T3A;
Chris@82 1208 T3F = T3A - T3z;
Chris@82 1209 ro[WS(os, 24)] = T3B - T3E;
Chris@82 1210 ro[WS(os, 19)] = T3F + T3G;
Chris@82 1211 ro[WS(os, 9)] = T3B + T3E;
Chris@82 1212 ro[WS(os, 14)] = T3F - T3G;
Chris@82 1213 }
Chris@82 1214 }
Chris@82 1215 }
Chris@82 1216 }
Chris@82 1217 }
Chris@82 1218
Chris@82 1219 static const kdft_desc desc = { 25, "n1_25", {260, 92, 92, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 1220
Chris@82 1221 void X(codelet_n1_25) (planner *p) {
Chris@82 1222 X(kdft_register) (p, n1_25, &desc);
Chris@82 1223 }
Chris@82 1224
Chris@82 1225 #endif