annotate src/fftw-3.3.8/dft/scalar/codelets/n1_32.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:11 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 372 FP additions, 136 FP multiplications,
Chris@82 32 * (or, 236 additions, 0 multiplications, 136 fused multiply/add),
Chris@82 33 * 100 stack variables, 7 constants, and 128 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 40 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@82 41 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 42 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@82 43 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 44 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 45 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 46 {
Chris@82 47 INT i;
Chris@82 48 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) {
Chris@82 49 E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G;
Chris@82 50 E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W;
Chris@82 51 E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5k, T2E;
Chris@82 52 E T3M, T4W, T5j, T2N, T3P, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D;
Chris@82 53 E T4G, T5h, T2d, T3F, T4L, T5g, T2m, T3I;
Chris@82 54 {
Chris@82 55 E T3, T1x, T14, T2R, T6, T2S, T17, T1y;
Chris@82 56 {
Chris@82 57 E T1, T2, T12, T13;
Chris@82 58 T1 = ri[0];
Chris@82 59 T2 = ri[WS(is, 16)];
Chris@82 60 T3 = T1 + T2;
Chris@82 61 T1x = T1 - T2;
Chris@82 62 T12 = ii[0];
Chris@82 63 T13 = ii[WS(is, 16)];
Chris@82 64 T14 = T12 + T13;
Chris@82 65 T2R = T12 - T13;
Chris@82 66 }
Chris@82 67 {
Chris@82 68 E T4, T5, T15, T16;
Chris@82 69 T4 = ri[WS(is, 8)];
Chris@82 70 T5 = ri[WS(is, 24)];
Chris@82 71 T6 = T4 + T5;
Chris@82 72 T2S = T4 - T5;
Chris@82 73 T15 = ii[WS(is, 8)];
Chris@82 74 T16 = ii[WS(is, 24)];
Chris@82 75 T17 = T15 + T16;
Chris@82 76 T1y = T15 - T16;
Chris@82 77 }
Chris@82 78 T7 = T3 + T6;
Chris@82 79 T4r = T3 - T6;
Chris@82 80 T4Z = T14 - T17;
Chris@82 81 T18 = T14 + T17;
Chris@82 82 T1z = T1x + T1y;
Chris@82 83 T3t = T1x - T1y;
Chris@82 84 T3T = T2S + T2R;
Chris@82 85 T2T = T2R - T2S;
Chris@82 86 }
Chris@82 87 {
Chris@82 88 E Ta, T1A, T1b, T1B, Td, T1D, T1e, T1E;
Chris@82 89 {
Chris@82 90 E T8, T9, T19, T1a;
Chris@82 91 T8 = ri[WS(is, 4)];
Chris@82 92 T9 = ri[WS(is, 20)];
Chris@82 93 Ta = T8 + T9;
Chris@82 94 T1A = T8 - T9;
Chris@82 95 T19 = ii[WS(is, 4)];
Chris@82 96 T1a = ii[WS(is, 20)];
Chris@82 97 T1b = T19 + T1a;
Chris@82 98 T1B = T19 - T1a;
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E Tb, Tc, T1c, T1d;
Chris@82 102 Tb = ri[WS(is, 28)];
Chris@82 103 Tc = ri[WS(is, 12)];
Chris@82 104 Td = Tb + Tc;
Chris@82 105 T1D = Tb - Tc;
Chris@82 106 T1c = ii[WS(is, 28)];
Chris@82 107 T1d = ii[WS(is, 12)];
Chris@82 108 T1e = T1c + T1d;
Chris@82 109 T1E = T1c - T1d;
Chris@82 110 }
Chris@82 111 Te = Ta + Td;
Chris@82 112 T1f = T1b + T1e;
Chris@82 113 T50 = Td - Ta;
Chris@82 114 T4s = T1b - T1e;
Chris@82 115 {
Chris@82 116 E T2U, T2V, T1C, T1F;
Chris@82 117 T2U = T1B - T1A;
Chris@82 118 T2V = T1D + T1E;
Chris@82 119 T2W = T2U + T2V;
Chris@82 120 T3u = T2U - T2V;
Chris@82 121 T1C = T1A + T1B;
Chris@82 122 T1F = T1D - T1E;
Chris@82 123 T1G = T1C + T1F;
Chris@82 124 T3U = T1F - T1C;
Chris@82 125 }
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E Ti, T1L, T1j, T1I, Tl, T1J, T1m, T1M, T1K, T1N;
Chris@82 129 {
Chris@82 130 E Tg, Th, T1h, T1i;
Chris@82 131 Tg = ri[WS(is, 2)];
Chris@82 132 Th = ri[WS(is, 18)];
Chris@82 133 Ti = Tg + Th;
Chris@82 134 T1L = Tg - Th;
Chris@82 135 T1h = ii[WS(is, 2)];
Chris@82 136 T1i = ii[WS(is, 18)];
Chris@82 137 T1j = T1h + T1i;
Chris@82 138 T1I = T1h - T1i;
Chris@82 139 }
Chris@82 140 {
Chris@82 141 E Tj, Tk, T1k, T1l;
Chris@82 142 Tj = ri[WS(is, 10)];
Chris@82 143 Tk = ri[WS(is, 26)];
Chris@82 144 Tl = Tj + Tk;
Chris@82 145 T1J = Tj - Tk;
Chris@82 146 T1k = ii[WS(is, 10)];
Chris@82 147 T1l = ii[WS(is, 26)];
Chris@82 148 T1m = T1k + T1l;
Chris@82 149 T1M = T1k - T1l;
Chris@82 150 }
Chris@82 151 Tm = Ti + Tl;
Chris@82 152 T1n = T1j + T1m;
Chris@82 153 T1K = T1I - T1J;
Chris@82 154 T1N = T1L + T1M;
Chris@82 155 T1O = FNMS(KP414213562, T1N, T1K);
Chris@82 156 T2Z = FMA(KP414213562, T1K, T1N);
Chris@82 157 {
Chris@82 158 E T3w, T3x, T4u, T4v;
Chris@82 159 T3w = T1J + T1I;
Chris@82 160 T3x = T1L - T1M;
Chris@82 161 T3y = FMA(KP414213562, T3x, T3w);
Chris@82 162 T3X = FNMS(KP414213562, T3w, T3x);
Chris@82 163 T4u = T1j - T1m;
Chris@82 164 T4v = Ti - Tl;
Chris@82 165 T4w = T4u - T4v;
Chris@82 166 T53 = T4v + T4u;
Chris@82 167 }
Chris@82 168 }
Chris@82 169 {
Chris@82 170 E Tp, T1S, T1q, T1P, Ts, T1Q, T1t, T1T, T1R, T1U;
Chris@82 171 {
Chris@82 172 E Tn, To, T1o, T1p;
Chris@82 173 Tn = ri[WS(is, 30)];
Chris@82 174 To = ri[WS(is, 14)];
Chris@82 175 Tp = Tn + To;
Chris@82 176 T1S = Tn - To;
Chris@82 177 T1o = ii[WS(is, 30)];
Chris@82 178 T1p = ii[WS(is, 14)];
Chris@82 179 T1q = T1o + T1p;
Chris@82 180 T1P = T1o - T1p;
Chris@82 181 }
Chris@82 182 {
Chris@82 183 E Tq, Tr, T1r, T1s;
Chris@82 184 Tq = ri[WS(is, 6)];
Chris@82 185 Tr = ri[WS(is, 22)];
Chris@82 186 Ts = Tq + Tr;
Chris@82 187 T1Q = Tq - Tr;
Chris@82 188 T1r = ii[WS(is, 6)];
Chris@82 189 T1s = ii[WS(is, 22)];
Chris@82 190 T1t = T1r + T1s;
Chris@82 191 T1T = T1r - T1s;
Chris@82 192 }
Chris@82 193 Tt = Tp + Ts;
Chris@82 194 T1u = T1q + T1t;
Chris@82 195 T1R = T1P - T1Q;
Chris@82 196 T1U = T1S + T1T;
Chris@82 197 T1V = FMA(KP414213562, T1U, T1R);
Chris@82 198 T2Y = FNMS(KP414213562, T1R, T1U);
Chris@82 199 {
Chris@82 200 E T3z, T3A, T4x, T4y;
Chris@82 201 T3z = T1Q + T1P;
Chris@82 202 T3A = T1S - T1T;
Chris@82 203 T3B = FNMS(KP414213562, T3A, T3z);
Chris@82 204 T3W = FMA(KP414213562, T3z, T3A);
Chris@82 205 T4x = Tp - Ts;
Chris@82 206 T4y = T1q - T1t;
Chris@82 207 T4z = T4x + T4y;
Chris@82 208 T52 = T4x - T4y;
Chris@82 209 }
Chris@82 210 }
Chris@82 211 {
Chris@82 212 E TN, T2G, T2r, T4N, TQ, T2s, T2J, T4O, TU, T2x, T2w, T4T, TX, T2z, T2C;
Chris@82 213 E T4U;
Chris@82 214 {
Chris@82 215 E TL, TM, T2p, T2q;
Chris@82 216 TL = ri[WS(is, 31)];
Chris@82 217 TM = ri[WS(is, 15)];
Chris@82 218 TN = TL + TM;
Chris@82 219 T2G = TL - TM;
Chris@82 220 T2p = ii[WS(is, 31)];
Chris@82 221 T2q = ii[WS(is, 15)];
Chris@82 222 T2r = T2p - T2q;
Chris@82 223 T4N = T2p + T2q;
Chris@82 224 }
Chris@82 225 {
Chris@82 226 E TO, TP, T2H, T2I;
Chris@82 227 TO = ri[WS(is, 7)];
Chris@82 228 TP = ri[WS(is, 23)];
Chris@82 229 TQ = TO + TP;
Chris@82 230 T2s = TO - TP;
Chris@82 231 T2H = ii[WS(is, 7)];
Chris@82 232 T2I = ii[WS(is, 23)];
Chris@82 233 T2J = T2H - T2I;
Chris@82 234 T4O = T2H + T2I;
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E TS, TT, T2u, T2v;
Chris@82 238 TS = ri[WS(is, 3)];
Chris@82 239 TT = ri[WS(is, 19)];
Chris@82 240 TU = TS + TT;
Chris@82 241 T2x = TS - TT;
Chris@82 242 T2u = ii[WS(is, 3)];
Chris@82 243 T2v = ii[WS(is, 19)];
Chris@82 244 T2w = T2u - T2v;
Chris@82 245 T4T = T2u + T2v;
Chris@82 246 }
Chris@82 247 {
Chris@82 248 E TV, TW, T2A, T2B;
Chris@82 249 TV = ri[WS(is, 27)];
Chris@82 250 TW = ri[WS(is, 11)];
Chris@82 251 TX = TV + TW;
Chris@82 252 T2z = TV - TW;
Chris@82 253 T2A = ii[WS(is, 27)];
Chris@82 254 T2B = ii[WS(is, 11)];
Chris@82 255 T2C = T2A - T2B;
Chris@82 256 T4U = T2A + T2B;
Chris@82 257 }
Chris@82 258 T2t = T2r - T2s;
Chris@82 259 T3L = T2G - T2J;
Chris@82 260 T3O = T2s + T2r;
Chris@82 261 T2K = T2G + T2J;
Chris@82 262 TR = TN + TQ;
Chris@82 263 TY = TU + TX;
Chris@82 264 T5F = TR - TY;
Chris@82 265 {
Chris@82 266 E T4P, T4Q, T2y, T2D;
Chris@82 267 T5G = T4N + T4O;
Chris@82 268 T5H = T4T + T4U;
Chris@82 269 T5I = T5G - T5H;
Chris@82 270 T4P = T4N - T4O;
Chris@82 271 T4Q = TX - TU;
Chris@82 272 T4R = T4P - T4Q;
Chris@82 273 T5k = T4Q + T4P;
Chris@82 274 T2y = T2w - T2x;
Chris@82 275 T2D = T2z + T2C;
Chris@82 276 T2E = T2y + T2D;
Chris@82 277 T3M = T2D - T2y;
Chris@82 278 {
Chris@82 279 E T4S, T4V, T2L, T2M;
Chris@82 280 T4S = TN - TQ;
Chris@82 281 T4V = T4T - T4U;
Chris@82 282 T4W = T4S - T4V;
Chris@82 283 T5j = T4S + T4V;
Chris@82 284 T2L = T2x + T2w;
Chris@82 285 T2M = T2z - T2C;
Chris@82 286 T2N = T2L + T2M;
Chris@82 287 T3P = T2L - T2M;
Chris@82 288 }
Chris@82 289 }
Chris@82 290 }
Chris@82 291 {
Chris@82 292 E Ty, T2f, T20, T4C, TB, T21, T2i, T4D, TF, T26, T25, T4I, TI, T28, T2b;
Chris@82 293 E T4J;
Chris@82 294 {
Chris@82 295 E Tw, Tx, T1Y, T1Z;
Chris@82 296 Tw = ri[WS(is, 1)];
Chris@82 297 Tx = ri[WS(is, 17)];
Chris@82 298 Ty = Tw + Tx;
Chris@82 299 T2f = Tw - Tx;
Chris@82 300 T1Y = ii[WS(is, 1)];
Chris@82 301 T1Z = ii[WS(is, 17)];
Chris@82 302 T20 = T1Y - T1Z;
Chris@82 303 T4C = T1Y + T1Z;
Chris@82 304 }
Chris@82 305 {
Chris@82 306 E Tz, TA, T2g, T2h;
Chris@82 307 Tz = ri[WS(is, 9)];
Chris@82 308 TA = ri[WS(is, 25)];
Chris@82 309 TB = Tz + TA;
Chris@82 310 T21 = Tz - TA;
Chris@82 311 T2g = ii[WS(is, 9)];
Chris@82 312 T2h = ii[WS(is, 25)];
Chris@82 313 T2i = T2g - T2h;
Chris@82 314 T4D = T2g + T2h;
Chris@82 315 }
Chris@82 316 {
Chris@82 317 E TD, TE, T23, T24;
Chris@82 318 TD = ri[WS(is, 5)];
Chris@82 319 TE = ri[WS(is, 21)];
Chris@82 320 TF = TD + TE;
Chris@82 321 T26 = TD - TE;
Chris@82 322 T23 = ii[WS(is, 5)];
Chris@82 323 T24 = ii[WS(is, 21)];
Chris@82 324 T25 = T23 - T24;
Chris@82 325 T4I = T23 + T24;
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E TG, TH, T29, T2a;
Chris@82 329 TG = ri[WS(is, 29)];
Chris@82 330 TH = ri[WS(is, 13)];
Chris@82 331 TI = TG + TH;
Chris@82 332 T28 = TG - TH;
Chris@82 333 T29 = ii[WS(is, 29)];
Chris@82 334 T2a = ii[WS(is, 13)];
Chris@82 335 T2b = T29 - T2a;
Chris@82 336 T4J = T29 + T2a;
Chris@82 337 }
Chris@82 338 T22 = T20 - T21;
Chris@82 339 T3E = T2f - T2i;
Chris@82 340 T3H = T21 + T20;
Chris@82 341 T2j = T2f + T2i;
Chris@82 342 TC = Ty + TB;
Chris@82 343 TJ = TF + TI;
Chris@82 344 T5A = TC - TJ;
Chris@82 345 {
Chris@82 346 E T4E, T4F, T27, T2c;
Chris@82 347 T5B = T4C + T4D;
Chris@82 348 T5C = T4I + T4J;
Chris@82 349 T5D = T5B - T5C;
Chris@82 350 T4E = T4C - T4D;
Chris@82 351 T4F = TI - TF;
Chris@82 352 T4G = T4E - T4F;
Chris@82 353 T5h = T4F + T4E;
Chris@82 354 T27 = T25 - T26;
Chris@82 355 T2c = T28 + T2b;
Chris@82 356 T2d = T27 + T2c;
Chris@82 357 T3F = T2c - T27;
Chris@82 358 {
Chris@82 359 E T4H, T4K, T2k, T2l;
Chris@82 360 T4H = Ty - TB;
Chris@82 361 T4K = T4I - T4J;
Chris@82 362 T4L = T4H - T4K;
Chris@82 363 T5g = T4H + T4K;
Chris@82 364 T2k = T26 + T25;
Chris@82 365 T2l = T28 - T2b;
Chris@82 366 T2m = T2k + T2l;
Chris@82 367 T3I = T2k - T2l;
Chris@82 368 }
Chris@82 369 }
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E T4B, T5b, T5a, T5c, T4Y, T56, T55, T57;
Chris@82 373 {
Chris@82 374 E T4t, T4A, T58, T59;
Chris@82 375 T4t = T4r - T4s;
Chris@82 376 T4A = T4w - T4z;
Chris@82 377 T4B = FMA(KP707106781, T4A, T4t);
Chris@82 378 T5b = FNMS(KP707106781, T4A, T4t);
Chris@82 379 T58 = FMA(KP414213562, T4R, T4W);
Chris@82 380 T59 = FNMS(KP414213562, T4G, T4L);
Chris@82 381 T5a = T58 - T59;
Chris@82 382 T5c = T59 + T58;
Chris@82 383 }
Chris@82 384 {
Chris@82 385 E T4M, T4X, T51, T54;
Chris@82 386 T4M = FMA(KP414213562, T4L, T4G);
Chris@82 387 T4X = FNMS(KP414213562, T4W, T4R);
Chris@82 388 T4Y = T4M - T4X;
Chris@82 389 T56 = T4M + T4X;
Chris@82 390 T51 = T4Z - T50;
Chris@82 391 T54 = T52 - T53;
Chris@82 392 T55 = FNMS(KP707106781, T54, T51);
Chris@82 393 T57 = FMA(KP707106781, T54, T51);
Chris@82 394 }
Chris@82 395 ro[WS(os, 22)] = FNMS(KP923879532, T4Y, T4B);
Chris@82 396 io[WS(os, 22)] = FNMS(KP923879532, T5a, T57);
Chris@82 397 ro[WS(os, 6)] = FMA(KP923879532, T4Y, T4B);
Chris@82 398 io[WS(os, 6)] = FMA(KP923879532, T5a, T57);
Chris@82 399 io[WS(os, 14)] = FNMS(KP923879532, T56, T55);
Chris@82 400 ro[WS(os, 14)] = FNMS(KP923879532, T5c, T5b);
Chris@82 401 io[WS(os, 30)] = FMA(KP923879532, T56, T55);
Chris@82 402 ro[WS(os, 30)] = FMA(KP923879532, T5c, T5b);
Chris@82 403 }
Chris@82 404 {
Chris@82 405 E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v;
Chris@82 406 {
Chris@82 407 E T5d, T5e, T5s, T5t;
Chris@82 408 T5d = T4r + T4s;
Chris@82 409 T5e = T53 + T52;
Chris@82 410 T5f = FMA(KP707106781, T5e, T5d);
Chris@82 411 T5r = FNMS(KP707106781, T5e, T5d);
Chris@82 412 T5s = FNMS(KP414213562, T5g, T5h);
Chris@82 413 T5t = FMA(KP414213562, T5j, T5k);
Chris@82 414 T5u = T5s - T5t;
Chris@82 415 T5w = T5s + T5t;
Chris@82 416 }
Chris@82 417 {
Chris@82 418 E T5i, T5l, T5n, T5o;
Chris@82 419 T5i = FMA(KP414213562, T5h, T5g);
Chris@82 420 T5l = FNMS(KP414213562, T5k, T5j);
Chris@82 421 T5m = T5i + T5l;
Chris@82 422 T5q = T5l - T5i;
Chris@82 423 T5n = T50 + T4Z;
Chris@82 424 T5o = T4w + T4z;
Chris@82 425 T5p = FNMS(KP707106781, T5o, T5n);
Chris@82 426 T5v = FMA(KP707106781, T5o, T5n);
Chris@82 427 }
Chris@82 428 ro[WS(os, 18)] = FNMS(KP923879532, T5m, T5f);
Chris@82 429 io[WS(os, 18)] = FNMS(KP923879532, T5w, T5v);
Chris@82 430 ro[WS(os, 2)] = FMA(KP923879532, T5m, T5f);
Chris@82 431 io[WS(os, 2)] = FMA(KP923879532, T5w, T5v);
Chris@82 432 io[WS(os, 26)] = FNMS(KP923879532, T5q, T5p);
Chris@82 433 ro[WS(os, 26)] = FNMS(KP923879532, T5u, T5r);
Chris@82 434 io[WS(os, 10)] = FMA(KP923879532, T5q, T5p);
Chris@82 435 ro[WS(os, 10)] = FMA(KP923879532, T5u, T5r);
Chris@82 436 }
Chris@82 437 {
Chris@82 438 E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T;
Chris@82 439 {
Chris@82 440 E T5x, T5y, T5Q, T5R;
Chris@82 441 T5x = T7 - Te;
Chris@82 442 T5y = T1n - T1u;
Chris@82 443 T5z = T5x + T5y;
Chris@82 444 T5P = T5x - T5y;
Chris@82 445 T5Q = T5D - T5A;
Chris@82 446 T5R = T5F + T5I;
Chris@82 447 T5S = T5Q - T5R;
Chris@82 448 T5U = T5Q + T5R;
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E T5E, T5J, T5L, T5M;
Chris@82 452 T5E = T5A + T5D;
Chris@82 453 T5J = T5F - T5I;
Chris@82 454 T5K = T5E + T5J;
Chris@82 455 T5O = T5J - T5E;
Chris@82 456 T5L = T18 - T1f;
Chris@82 457 T5M = Tt - Tm;
Chris@82 458 T5N = T5L - T5M;
Chris@82 459 T5T = T5M + T5L;
Chris@82 460 }
Chris@82 461 ro[WS(os, 20)] = FNMS(KP707106781, T5K, T5z);
Chris@82 462 io[WS(os, 20)] = FNMS(KP707106781, T5U, T5T);
Chris@82 463 ro[WS(os, 4)] = FMA(KP707106781, T5K, T5z);
Chris@82 464 io[WS(os, 4)] = FMA(KP707106781, T5U, T5T);
Chris@82 465 io[WS(os, 28)] = FNMS(KP707106781, T5O, T5N);
Chris@82 466 ro[WS(os, 28)] = FNMS(KP707106781, T5S, T5P);
Chris@82 467 io[WS(os, 12)] = FMA(KP707106781, T5O, T5N);
Chris@82 468 ro[WS(os, 12)] = FMA(KP707106781, T5S, T5P);
Chris@82 469 }
Chris@82 470 {
Chris@82 471 E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z;
Chris@82 472 {
Chris@82 473 E Tf, Tu, T5W, T5X;
Chris@82 474 Tf = T7 + Te;
Chris@82 475 Tu = Tm + Tt;
Chris@82 476 Tv = Tf + Tu;
Chris@82 477 T5V = Tf - Tu;
Chris@82 478 T5W = T5B + T5C;
Chris@82 479 T5X = T5G + T5H;
Chris@82 480 T5Y = T5W - T5X;
Chris@82 481 T60 = T5W + T5X;
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E TK, TZ, T1g, T1v;
Chris@82 485 TK = TC + TJ;
Chris@82 486 TZ = TR + TY;
Chris@82 487 T10 = TK + TZ;
Chris@82 488 T11 = TZ - TK;
Chris@82 489 T1g = T18 + T1f;
Chris@82 490 T1v = T1n + T1u;
Chris@82 491 T1w = T1g - T1v;
Chris@82 492 T5Z = T1g + T1v;
Chris@82 493 }
Chris@82 494 ro[WS(os, 16)] = Tv - T10;
Chris@82 495 io[WS(os, 16)] = T5Z - T60;
Chris@82 496 ro[0] = Tv + T10;
Chris@82 497 io[0] = T5Z + T60;
Chris@82 498 io[WS(os, 8)] = T11 + T1w;
Chris@82 499 ro[WS(os, 8)] = T5V + T5Y;
Chris@82 500 io[WS(os, 24)] = T1w - T11;
Chris@82 501 ro[WS(os, 24)] = T5V - T5Y;
Chris@82 502 }
Chris@82 503 {
Chris@82 504 E T1X, T37, T31, T33, T2o, T35, T2P, T34;
Chris@82 505 {
Chris@82 506 E T1H, T1W, T2X, T30;
Chris@82 507 T1H = FNMS(KP707106781, T1G, T1z);
Chris@82 508 T1W = T1O - T1V;
Chris@82 509 T1X = FMA(KP923879532, T1W, T1H);
Chris@82 510 T37 = FNMS(KP923879532, T1W, T1H);
Chris@82 511 T2X = FNMS(KP707106781, T2W, T2T);
Chris@82 512 T30 = T2Y - T2Z;
Chris@82 513 T31 = FNMS(KP923879532, T30, T2X);
Chris@82 514 T33 = FMA(KP923879532, T30, T2X);
Chris@82 515 }
Chris@82 516 {
Chris@82 517 E T2e, T2n, T2F, T2O;
Chris@82 518 T2e = FNMS(KP707106781, T2d, T22);
Chris@82 519 T2n = FNMS(KP707106781, T2m, T2j);
Chris@82 520 T2o = FMA(KP668178637, T2n, T2e);
Chris@82 521 T35 = FNMS(KP668178637, T2e, T2n);
Chris@82 522 T2F = FNMS(KP707106781, T2E, T2t);
Chris@82 523 T2O = FNMS(KP707106781, T2N, T2K);
Chris@82 524 T2P = FNMS(KP668178637, T2O, T2F);
Chris@82 525 T34 = FMA(KP668178637, T2F, T2O);
Chris@82 526 }
Chris@82 527 {
Chris@82 528 E T2Q, T36, T32, T38;
Chris@82 529 T2Q = T2o - T2P;
Chris@82 530 ro[WS(os, 21)] = FNMS(KP831469612, T2Q, T1X);
Chris@82 531 ro[WS(os, 5)] = FMA(KP831469612, T2Q, T1X);
Chris@82 532 T36 = T34 - T35;
Chris@82 533 io[WS(os, 21)] = FNMS(KP831469612, T36, T33);
Chris@82 534 io[WS(os, 5)] = FMA(KP831469612, T36, T33);
Chris@82 535 T32 = T2o + T2P;
Chris@82 536 io[WS(os, 13)] = FNMS(KP831469612, T32, T31);
Chris@82 537 io[WS(os, 29)] = FMA(KP831469612, T32, T31);
Chris@82 538 T38 = T35 + T34;
Chris@82 539 ro[WS(os, 13)] = FNMS(KP831469612, T38, T37);
Chris@82 540 ro[WS(os, 29)] = FMA(KP831469612, T38, T37);
Chris@82 541 }
Chris@82 542 }
Chris@82 543 {
Chris@82 544 E T3D, T41, T3Z, T45, T3K, T42, T3R, T43;
Chris@82 545 {
Chris@82 546 E T3v, T3C, T3V, T3Y;
Chris@82 547 T3v = FMA(KP707106781, T3u, T3t);
Chris@82 548 T3C = T3y - T3B;
Chris@82 549 T3D = FMA(KP923879532, T3C, T3v);
Chris@82 550 T41 = FNMS(KP923879532, T3C, T3v);
Chris@82 551 T3V = FMA(KP707106781, T3U, T3T);
Chris@82 552 T3Y = T3W - T3X;
Chris@82 553 T3Z = FNMS(KP923879532, T3Y, T3V);
Chris@82 554 T45 = FMA(KP923879532, T3Y, T3V);
Chris@82 555 }
Chris@82 556 {
Chris@82 557 E T3G, T3J, T3N, T3Q;
Chris@82 558 T3G = FNMS(KP707106781, T3F, T3E);
Chris@82 559 T3J = FNMS(KP707106781, T3I, T3H);
Chris@82 560 T3K = FMA(KP668178637, T3J, T3G);
Chris@82 561 T42 = FNMS(KP668178637, T3G, T3J);
Chris@82 562 T3N = FNMS(KP707106781, T3M, T3L);
Chris@82 563 T3Q = FNMS(KP707106781, T3P, T3O);
Chris@82 564 T3R = FNMS(KP668178637, T3Q, T3N);
Chris@82 565 T43 = FMA(KP668178637, T3N, T3Q);
Chris@82 566 }
Chris@82 567 {
Chris@82 568 E T3S, T46, T40, T44;
Chris@82 569 T3S = T3K + T3R;
Chris@82 570 ro[WS(os, 19)] = FNMS(KP831469612, T3S, T3D);
Chris@82 571 ro[WS(os, 3)] = FMA(KP831469612, T3S, T3D);
Chris@82 572 T46 = T42 + T43;
Chris@82 573 io[WS(os, 19)] = FNMS(KP831469612, T46, T45);
Chris@82 574 io[WS(os, 3)] = FMA(KP831469612, T46, T45);
Chris@82 575 T40 = T3R - T3K;
Chris@82 576 io[WS(os, 27)] = FNMS(KP831469612, T40, T3Z);
Chris@82 577 io[WS(os, 11)] = FMA(KP831469612, T40, T3Z);
Chris@82 578 T44 = T42 - T43;
Chris@82 579 ro[WS(os, 27)] = FNMS(KP831469612, T44, T41);
Chris@82 580 ro[WS(os, 11)] = FMA(KP831469612, T44, T41);
Chris@82 581 }
Chris@82 582 }
Chris@82 583 {
Chris@82 584 E T49, T4p, T4j, T4l, T4c, T4n, T4f, T4m;
Chris@82 585 {
Chris@82 586 E T47, T48, T4h, T4i;
Chris@82 587 T47 = FNMS(KP707106781, T3u, T3t);
Chris@82 588 T48 = T3X + T3W;
Chris@82 589 T49 = FNMS(KP923879532, T48, T47);
Chris@82 590 T4p = FMA(KP923879532, T48, T47);
Chris@82 591 T4h = FNMS(KP707106781, T3U, T3T);
Chris@82 592 T4i = T3y + T3B;
Chris@82 593 T4j = FMA(KP923879532, T4i, T4h);
Chris@82 594 T4l = FNMS(KP923879532, T4i, T4h);
Chris@82 595 }
Chris@82 596 {
Chris@82 597 E T4a, T4b, T4d, T4e;
Chris@82 598 T4a = FMA(KP707106781, T3I, T3H);
Chris@82 599 T4b = FMA(KP707106781, T3F, T3E);
Chris@82 600 T4c = FMA(KP198912367, T4b, T4a);
Chris@82 601 T4n = FNMS(KP198912367, T4a, T4b);
Chris@82 602 T4d = FMA(KP707106781, T3P, T3O);
Chris@82 603 T4e = FMA(KP707106781, T3M, T3L);
Chris@82 604 T4f = FNMS(KP198912367, T4e, T4d);
Chris@82 605 T4m = FMA(KP198912367, T4d, T4e);
Chris@82 606 }
Chris@82 607 {
Chris@82 608 E T4g, T4o, T4k, T4q;
Chris@82 609 T4g = T4c - T4f;
Chris@82 610 ro[WS(os, 23)] = FNMS(KP980785280, T4g, T49);
Chris@82 611 ro[WS(os, 7)] = FMA(KP980785280, T4g, T49);
Chris@82 612 T4o = T4m - T4n;
Chris@82 613 io[WS(os, 23)] = FNMS(KP980785280, T4o, T4l);
Chris@82 614 io[WS(os, 7)] = FMA(KP980785280, T4o, T4l);
Chris@82 615 T4k = T4c + T4f;
Chris@82 616 io[WS(os, 15)] = FNMS(KP980785280, T4k, T4j);
Chris@82 617 io[WS(os, 31)] = FMA(KP980785280, T4k, T4j);
Chris@82 618 T4q = T4n + T4m;
Chris@82 619 ro[WS(os, 15)] = FNMS(KP980785280, T4q, T4p);
Chris@82 620 ro[WS(os, 31)] = FMA(KP980785280, T4q, T4p);
Chris@82 621 }
Chris@82 622 }
Chris@82 623 {
Chris@82 624 E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p;
Chris@82 625 {
Chris@82 626 E T39, T3a, T3j, T3k;
Chris@82 627 T39 = FMA(KP707106781, T1G, T1z);
Chris@82 628 T3a = T2Z + T2Y;
Chris@82 629 T3b = FMA(KP923879532, T3a, T39);
Chris@82 630 T3n = FNMS(KP923879532, T3a, T39);
Chris@82 631 T3j = FMA(KP707106781, T2W, T2T);
Chris@82 632 T3k = T1O + T1V;
Chris@82 633 T3l = FNMS(KP923879532, T3k, T3j);
Chris@82 634 T3r = FMA(KP923879532, T3k, T3j);
Chris@82 635 }
Chris@82 636 {
Chris@82 637 E T3c, T3d, T3f, T3g;
Chris@82 638 T3c = FMA(KP707106781, T2m, T2j);
Chris@82 639 T3d = FMA(KP707106781, T2d, T22);
Chris@82 640 T3e = FMA(KP198912367, T3d, T3c);
Chris@82 641 T3o = FNMS(KP198912367, T3c, T3d);
Chris@82 642 T3f = FMA(KP707106781, T2N, T2K);
Chris@82 643 T3g = FMA(KP707106781, T2E, T2t);
Chris@82 644 T3h = FNMS(KP198912367, T3g, T3f);
Chris@82 645 T3p = FMA(KP198912367, T3f, T3g);
Chris@82 646 }
Chris@82 647 {
Chris@82 648 E T3i, T3s, T3m, T3q;
Chris@82 649 T3i = T3e + T3h;
Chris@82 650 ro[WS(os, 17)] = FNMS(KP980785280, T3i, T3b);
Chris@82 651 ro[WS(os, 1)] = FMA(KP980785280, T3i, T3b);
Chris@82 652 T3s = T3o + T3p;
Chris@82 653 io[WS(os, 17)] = FNMS(KP980785280, T3s, T3r);
Chris@82 654 io[WS(os, 1)] = FMA(KP980785280, T3s, T3r);
Chris@82 655 T3m = T3h - T3e;
Chris@82 656 io[WS(os, 25)] = FNMS(KP980785280, T3m, T3l);
Chris@82 657 io[WS(os, 9)] = FMA(KP980785280, T3m, T3l);
Chris@82 658 T3q = T3o - T3p;
Chris@82 659 ro[WS(os, 25)] = FNMS(KP980785280, T3q, T3n);
Chris@82 660 ro[WS(os, 9)] = FMA(KP980785280, T3q, T3n);
Chris@82 661 }
Chris@82 662 }
Chris@82 663 }
Chris@82 664 }
Chris@82 665 }
Chris@82 666
Chris@82 667 static const kdft_desc desc = { 32, "n1_32", {236, 0, 136, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 668
Chris@82 669 void X(codelet_n1_32) (planner *p) {
Chris@82 670 X(kdft_register) (p, n1_32, &desc);
Chris@82 671 }
Chris@82 672
Chris@82 673 #else
Chris@82 674
Chris@82 675 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include dft/scalar/n.h */
Chris@82 676
Chris@82 677 /*
Chris@82 678 * This function contains 372 FP additions, 84 FP multiplications,
Chris@82 679 * (or, 340 additions, 52 multiplications, 32 fused multiply/add),
Chris@82 680 * 100 stack variables, 7 constants, and 128 memory accesses
Chris@82 681 */
Chris@82 682 #include "dft/scalar/n.h"
Chris@82 683
Chris@82 684 static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 685 {
Chris@82 686 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@82 687 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@82 688 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@82 689 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@82 690 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 691 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 692 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 693 {
Chris@82 694 INT i;
Chris@82 695 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) {
Chris@82 696 E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G;
Chris@82 697 E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W;
Chris@82 698 E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5j, T2E;
Chris@82 699 E T3P, T4W, T5k, T2N, T3M, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D;
Chris@82 700 E T4G, T5g, T2d, T3F, T4L, T5h, T2m, T3I;
Chris@82 701 {
Chris@82 702 E T3, T1x, T14, T2S, T6, T2R, T17, T1y;
Chris@82 703 {
Chris@82 704 E T1, T2, T12, T13;
Chris@82 705 T1 = ri[0];
Chris@82 706 T2 = ri[WS(is, 16)];
Chris@82 707 T3 = T1 + T2;
Chris@82 708 T1x = T1 - T2;
Chris@82 709 T12 = ii[0];
Chris@82 710 T13 = ii[WS(is, 16)];
Chris@82 711 T14 = T12 + T13;
Chris@82 712 T2S = T12 - T13;
Chris@82 713 }
Chris@82 714 {
Chris@82 715 E T4, T5, T15, T16;
Chris@82 716 T4 = ri[WS(is, 8)];
Chris@82 717 T5 = ri[WS(is, 24)];
Chris@82 718 T6 = T4 + T5;
Chris@82 719 T2R = T4 - T5;
Chris@82 720 T15 = ii[WS(is, 8)];
Chris@82 721 T16 = ii[WS(is, 24)];
Chris@82 722 T17 = T15 + T16;
Chris@82 723 T1y = T15 - T16;
Chris@82 724 }
Chris@82 725 T7 = T3 + T6;
Chris@82 726 T4r = T3 - T6;
Chris@82 727 T4Z = T14 - T17;
Chris@82 728 T18 = T14 + T17;
Chris@82 729 T1z = T1x - T1y;
Chris@82 730 T3t = T1x + T1y;
Chris@82 731 T3T = T2S - T2R;
Chris@82 732 T2T = T2R + T2S;
Chris@82 733 }
Chris@82 734 {
Chris@82 735 E Ta, T1B, T1b, T1A, Td, T1D, T1e, T1E;
Chris@82 736 {
Chris@82 737 E T8, T9, T19, T1a;
Chris@82 738 T8 = ri[WS(is, 4)];
Chris@82 739 T9 = ri[WS(is, 20)];
Chris@82 740 Ta = T8 + T9;
Chris@82 741 T1B = T8 - T9;
Chris@82 742 T19 = ii[WS(is, 4)];
Chris@82 743 T1a = ii[WS(is, 20)];
Chris@82 744 T1b = T19 + T1a;
Chris@82 745 T1A = T19 - T1a;
Chris@82 746 }
Chris@82 747 {
Chris@82 748 E Tb, Tc, T1c, T1d;
Chris@82 749 Tb = ri[WS(is, 28)];
Chris@82 750 Tc = ri[WS(is, 12)];
Chris@82 751 Td = Tb + Tc;
Chris@82 752 T1D = Tb - Tc;
Chris@82 753 T1c = ii[WS(is, 28)];
Chris@82 754 T1d = ii[WS(is, 12)];
Chris@82 755 T1e = T1c + T1d;
Chris@82 756 T1E = T1c - T1d;
Chris@82 757 }
Chris@82 758 Te = Ta + Td;
Chris@82 759 T1f = T1b + T1e;
Chris@82 760 T50 = Td - Ta;
Chris@82 761 T4s = T1b - T1e;
Chris@82 762 {
Chris@82 763 E T2U, T2V, T1C, T1F;
Chris@82 764 T2U = T1D - T1E;
Chris@82 765 T2V = T1B + T1A;
Chris@82 766 T2W = KP707106781 * (T2U - T2V);
Chris@82 767 T3u = KP707106781 * (T2V + T2U);
Chris@82 768 T1C = T1A - T1B;
Chris@82 769 T1F = T1D + T1E;
Chris@82 770 T1G = KP707106781 * (T1C - T1F);
Chris@82 771 T3U = KP707106781 * (T1C + T1F);
Chris@82 772 }
Chris@82 773 }
Chris@82 774 {
Chris@82 775 E Ti, T1L, T1j, T1J, Tl, T1I, T1m, T1M, T1K, T1N;
Chris@82 776 {
Chris@82 777 E Tg, Th, T1h, T1i;
Chris@82 778 Tg = ri[WS(is, 2)];
Chris@82 779 Th = ri[WS(is, 18)];
Chris@82 780 Ti = Tg + Th;
Chris@82 781 T1L = Tg - Th;
Chris@82 782 T1h = ii[WS(is, 2)];
Chris@82 783 T1i = ii[WS(is, 18)];
Chris@82 784 T1j = T1h + T1i;
Chris@82 785 T1J = T1h - T1i;
Chris@82 786 }
Chris@82 787 {
Chris@82 788 E Tj, Tk, T1k, T1l;
Chris@82 789 Tj = ri[WS(is, 10)];
Chris@82 790 Tk = ri[WS(is, 26)];
Chris@82 791 Tl = Tj + Tk;
Chris@82 792 T1I = Tj - Tk;
Chris@82 793 T1k = ii[WS(is, 10)];
Chris@82 794 T1l = ii[WS(is, 26)];
Chris@82 795 T1m = T1k + T1l;
Chris@82 796 T1M = T1k - T1l;
Chris@82 797 }
Chris@82 798 Tm = Ti + Tl;
Chris@82 799 T1n = T1j + T1m;
Chris@82 800 T1K = T1I + T1J;
Chris@82 801 T1N = T1L - T1M;
Chris@82 802 T1O = FNMS(KP923879532, T1N, KP382683432 * T1K);
Chris@82 803 T2Z = FMA(KP923879532, T1K, KP382683432 * T1N);
Chris@82 804 {
Chris@82 805 E T3w, T3x, T4u, T4v;
Chris@82 806 T3w = T1J - T1I;
Chris@82 807 T3x = T1L + T1M;
Chris@82 808 T3y = FNMS(KP382683432, T3x, KP923879532 * T3w);
Chris@82 809 T3X = FMA(KP382683432, T3w, KP923879532 * T3x);
Chris@82 810 T4u = T1j - T1m;
Chris@82 811 T4v = Ti - Tl;
Chris@82 812 T4w = T4u - T4v;
Chris@82 813 T53 = T4v + T4u;
Chris@82 814 }
Chris@82 815 }
Chris@82 816 {
Chris@82 817 E Tp, T1S, T1q, T1Q, Ts, T1P, T1t, T1T, T1R, T1U;
Chris@82 818 {
Chris@82 819 E Tn, To, T1o, T1p;
Chris@82 820 Tn = ri[WS(is, 30)];
Chris@82 821 To = ri[WS(is, 14)];
Chris@82 822 Tp = Tn + To;
Chris@82 823 T1S = Tn - To;
Chris@82 824 T1o = ii[WS(is, 30)];
Chris@82 825 T1p = ii[WS(is, 14)];
Chris@82 826 T1q = T1o + T1p;
Chris@82 827 T1Q = T1o - T1p;
Chris@82 828 }
Chris@82 829 {
Chris@82 830 E Tq, Tr, T1r, T1s;
Chris@82 831 Tq = ri[WS(is, 6)];
Chris@82 832 Tr = ri[WS(is, 22)];
Chris@82 833 Ts = Tq + Tr;
Chris@82 834 T1P = Tq - Tr;
Chris@82 835 T1r = ii[WS(is, 6)];
Chris@82 836 T1s = ii[WS(is, 22)];
Chris@82 837 T1t = T1r + T1s;
Chris@82 838 T1T = T1r - T1s;
Chris@82 839 }
Chris@82 840 Tt = Tp + Ts;
Chris@82 841 T1u = T1q + T1t;
Chris@82 842 T1R = T1P + T1Q;
Chris@82 843 T1U = T1S - T1T;
Chris@82 844 T1V = FMA(KP382683432, T1R, KP923879532 * T1U);
Chris@82 845 T2Y = FNMS(KP923879532, T1R, KP382683432 * T1U);
Chris@82 846 {
Chris@82 847 E T3z, T3A, T4x, T4y;
Chris@82 848 T3z = T1Q - T1P;
Chris@82 849 T3A = T1S + T1T;
Chris@82 850 T3B = FMA(KP923879532, T3z, KP382683432 * T3A);
Chris@82 851 T3W = FNMS(KP382683432, T3z, KP923879532 * T3A);
Chris@82 852 T4x = Tp - Ts;
Chris@82 853 T4y = T1q - T1t;
Chris@82 854 T4z = T4x + T4y;
Chris@82 855 T52 = T4x - T4y;
Chris@82 856 }
Chris@82 857 }
Chris@82 858 {
Chris@82 859 E TN, T2p, T2J, T4S, TQ, T2G, T2s, T4T, TU, T2x, T2w, T4O, TX, T2z, T2C;
Chris@82 860 E T4P;
Chris@82 861 {
Chris@82 862 E TL, TM, T2H, T2I;
Chris@82 863 TL = ri[WS(is, 31)];
Chris@82 864 TM = ri[WS(is, 15)];
Chris@82 865 TN = TL + TM;
Chris@82 866 T2p = TL - TM;
Chris@82 867 T2H = ii[WS(is, 31)];
Chris@82 868 T2I = ii[WS(is, 15)];
Chris@82 869 T2J = T2H - T2I;
Chris@82 870 T4S = T2H + T2I;
Chris@82 871 }
Chris@82 872 {
Chris@82 873 E TO, TP, T2q, T2r;
Chris@82 874 TO = ri[WS(is, 7)];
Chris@82 875 TP = ri[WS(is, 23)];
Chris@82 876 TQ = TO + TP;
Chris@82 877 T2G = TO - TP;
Chris@82 878 T2q = ii[WS(is, 7)];
Chris@82 879 T2r = ii[WS(is, 23)];
Chris@82 880 T2s = T2q - T2r;
Chris@82 881 T4T = T2q + T2r;
Chris@82 882 }
Chris@82 883 {
Chris@82 884 E TS, TT, T2u, T2v;
Chris@82 885 TS = ri[WS(is, 3)];
Chris@82 886 TT = ri[WS(is, 19)];
Chris@82 887 TU = TS + TT;
Chris@82 888 T2x = TS - TT;
Chris@82 889 T2u = ii[WS(is, 3)];
Chris@82 890 T2v = ii[WS(is, 19)];
Chris@82 891 T2w = T2u - T2v;
Chris@82 892 T4O = T2u + T2v;
Chris@82 893 }
Chris@82 894 {
Chris@82 895 E TV, TW, T2A, T2B;
Chris@82 896 TV = ri[WS(is, 27)];
Chris@82 897 TW = ri[WS(is, 11)];
Chris@82 898 TX = TV + TW;
Chris@82 899 T2z = TV - TW;
Chris@82 900 T2A = ii[WS(is, 27)];
Chris@82 901 T2B = ii[WS(is, 11)];
Chris@82 902 T2C = T2A - T2B;
Chris@82 903 T4P = T2A + T2B;
Chris@82 904 }
Chris@82 905 T2t = T2p - T2s;
Chris@82 906 T3L = T2p + T2s;
Chris@82 907 T3O = T2J - T2G;
Chris@82 908 T2K = T2G + T2J;
Chris@82 909 TR = TN + TQ;
Chris@82 910 TY = TU + TX;
Chris@82 911 T5F = TR - TY;
Chris@82 912 {
Chris@82 913 E T4N, T4Q, T2y, T2D;
Chris@82 914 T5G = T4S + T4T;
Chris@82 915 T5H = T4O + T4P;
Chris@82 916 T5I = T5G - T5H;
Chris@82 917 T4N = TN - TQ;
Chris@82 918 T4Q = T4O - T4P;
Chris@82 919 T4R = T4N - T4Q;
Chris@82 920 T5j = T4N + T4Q;
Chris@82 921 T2y = T2w - T2x;
Chris@82 922 T2D = T2z + T2C;
Chris@82 923 T2E = KP707106781 * (T2y - T2D);
Chris@82 924 T3P = KP707106781 * (T2y + T2D);
Chris@82 925 {
Chris@82 926 E T4U, T4V, T2L, T2M;
Chris@82 927 T4U = T4S - T4T;
Chris@82 928 T4V = TX - TU;
Chris@82 929 T4W = T4U - T4V;
Chris@82 930 T5k = T4V + T4U;
Chris@82 931 T2L = T2z - T2C;
Chris@82 932 T2M = T2x + T2w;
Chris@82 933 T2N = KP707106781 * (T2L - T2M);
Chris@82 934 T3M = KP707106781 * (T2M + T2L);
Chris@82 935 }
Chris@82 936 }
Chris@82 937 }
Chris@82 938 {
Chris@82 939 E Ty, T2f, T21, T4C, TB, T1Y, T2i, T4D, TF, T28, T2b, T4I, TI, T23, T26;
Chris@82 940 E T4J;
Chris@82 941 {
Chris@82 942 E Tw, Tx, T1Z, T20;
Chris@82 943 Tw = ri[WS(is, 1)];
Chris@82 944 Tx = ri[WS(is, 17)];
Chris@82 945 Ty = Tw + Tx;
Chris@82 946 T2f = Tw - Tx;
Chris@82 947 T1Z = ii[WS(is, 1)];
Chris@82 948 T20 = ii[WS(is, 17)];
Chris@82 949 T21 = T1Z - T20;
Chris@82 950 T4C = T1Z + T20;
Chris@82 951 }
Chris@82 952 {
Chris@82 953 E Tz, TA, T2g, T2h;
Chris@82 954 Tz = ri[WS(is, 9)];
Chris@82 955 TA = ri[WS(is, 25)];
Chris@82 956 TB = Tz + TA;
Chris@82 957 T1Y = Tz - TA;
Chris@82 958 T2g = ii[WS(is, 9)];
Chris@82 959 T2h = ii[WS(is, 25)];
Chris@82 960 T2i = T2g - T2h;
Chris@82 961 T4D = T2g + T2h;
Chris@82 962 }
Chris@82 963 {
Chris@82 964 E TD, TE, T29, T2a;
Chris@82 965 TD = ri[WS(is, 5)];
Chris@82 966 TE = ri[WS(is, 21)];
Chris@82 967 TF = TD + TE;
Chris@82 968 T28 = TD - TE;
Chris@82 969 T29 = ii[WS(is, 5)];
Chris@82 970 T2a = ii[WS(is, 21)];
Chris@82 971 T2b = T29 - T2a;
Chris@82 972 T4I = T29 + T2a;
Chris@82 973 }
Chris@82 974 {
Chris@82 975 E TG, TH, T24, T25;
Chris@82 976 TG = ri[WS(is, 29)];
Chris@82 977 TH = ri[WS(is, 13)];
Chris@82 978 TI = TG + TH;
Chris@82 979 T23 = TG - TH;
Chris@82 980 T24 = ii[WS(is, 29)];
Chris@82 981 T25 = ii[WS(is, 13)];
Chris@82 982 T26 = T24 - T25;
Chris@82 983 T4J = T24 + T25;
Chris@82 984 }
Chris@82 985 T22 = T1Y + T21;
Chris@82 986 T3E = T2f + T2i;
Chris@82 987 T3H = T21 - T1Y;
Chris@82 988 T2j = T2f - T2i;
Chris@82 989 TC = Ty + TB;
Chris@82 990 TJ = TF + TI;
Chris@82 991 T5A = TC - TJ;
Chris@82 992 {
Chris@82 993 E T4E, T4F, T27, T2c;
Chris@82 994 T5B = T4C + T4D;
Chris@82 995 T5C = T4I + T4J;
Chris@82 996 T5D = T5B - T5C;
Chris@82 997 T4E = T4C - T4D;
Chris@82 998 T4F = TI - TF;
Chris@82 999 T4G = T4E - T4F;
Chris@82 1000 T5g = T4F + T4E;
Chris@82 1001 T27 = T23 - T26;
Chris@82 1002 T2c = T28 + T2b;
Chris@82 1003 T2d = KP707106781 * (T27 - T2c);
Chris@82 1004 T3F = KP707106781 * (T2c + T27);
Chris@82 1005 {
Chris@82 1006 E T4H, T4K, T2k, T2l;
Chris@82 1007 T4H = Ty - TB;
Chris@82 1008 T4K = T4I - T4J;
Chris@82 1009 T4L = T4H - T4K;
Chris@82 1010 T5h = T4H + T4K;
Chris@82 1011 T2k = T2b - T28;
Chris@82 1012 T2l = T23 + T26;
Chris@82 1013 T2m = KP707106781 * (T2k - T2l);
Chris@82 1014 T3I = KP707106781 * (T2k + T2l);
Chris@82 1015 }
Chris@82 1016 }
Chris@82 1017 }
Chris@82 1018 {
Chris@82 1019 E T4B, T57, T5a, T5c, T4Y, T56, T55, T5b;
Chris@82 1020 {
Chris@82 1021 E T4t, T4A, T58, T59;
Chris@82 1022 T4t = T4r - T4s;
Chris@82 1023 T4A = KP707106781 * (T4w - T4z);
Chris@82 1024 T4B = T4t + T4A;
Chris@82 1025 T57 = T4t - T4A;
Chris@82 1026 T58 = FNMS(KP923879532, T4L, KP382683432 * T4G);
Chris@82 1027 T59 = FMA(KP382683432, T4W, KP923879532 * T4R);
Chris@82 1028 T5a = T58 - T59;
Chris@82 1029 T5c = T58 + T59;
Chris@82 1030 }
Chris@82 1031 {
Chris@82 1032 E T4M, T4X, T51, T54;
Chris@82 1033 T4M = FMA(KP923879532, T4G, KP382683432 * T4L);
Chris@82 1034 T4X = FNMS(KP923879532, T4W, KP382683432 * T4R);
Chris@82 1035 T4Y = T4M + T4X;
Chris@82 1036 T56 = T4X - T4M;
Chris@82 1037 T51 = T4Z - T50;
Chris@82 1038 T54 = KP707106781 * (T52 - T53);
Chris@82 1039 T55 = T51 - T54;
Chris@82 1040 T5b = T51 + T54;
Chris@82 1041 }
Chris@82 1042 ro[WS(os, 22)] = T4B - T4Y;
Chris@82 1043 io[WS(os, 22)] = T5b - T5c;
Chris@82 1044 ro[WS(os, 6)] = T4B + T4Y;
Chris@82 1045 io[WS(os, 6)] = T5b + T5c;
Chris@82 1046 io[WS(os, 30)] = T55 - T56;
Chris@82 1047 ro[WS(os, 30)] = T57 - T5a;
Chris@82 1048 io[WS(os, 14)] = T55 + T56;
Chris@82 1049 ro[WS(os, 14)] = T57 + T5a;
Chris@82 1050 }
Chris@82 1051 {
Chris@82 1052 E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v;
Chris@82 1053 {
Chris@82 1054 E T5d, T5e, T5s, T5t;
Chris@82 1055 T5d = T4r + T4s;
Chris@82 1056 T5e = KP707106781 * (T53 + T52);
Chris@82 1057 T5f = T5d + T5e;
Chris@82 1058 T5r = T5d - T5e;
Chris@82 1059 T5s = FNMS(KP382683432, T5h, KP923879532 * T5g);
Chris@82 1060 T5t = FMA(KP923879532, T5k, KP382683432 * T5j);
Chris@82 1061 T5u = T5s - T5t;
Chris@82 1062 T5w = T5s + T5t;
Chris@82 1063 }
Chris@82 1064 {
Chris@82 1065 E T5i, T5l, T5n, T5o;
Chris@82 1066 T5i = FMA(KP382683432, T5g, KP923879532 * T5h);
Chris@82 1067 T5l = FNMS(KP382683432, T5k, KP923879532 * T5j);
Chris@82 1068 T5m = T5i + T5l;
Chris@82 1069 T5q = T5l - T5i;
Chris@82 1070 T5n = T50 + T4Z;
Chris@82 1071 T5o = KP707106781 * (T4w + T4z);
Chris@82 1072 T5p = T5n - T5o;
Chris@82 1073 T5v = T5n + T5o;
Chris@82 1074 }
Chris@82 1075 ro[WS(os, 18)] = T5f - T5m;
Chris@82 1076 io[WS(os, 18)] = T5v - T5w;
Chris@82 1077 ro[WS(os, 2)] = T5f + T5m;
Chris@82 1078 io[WS(os, 2)] = T5v + T5w;
Chris@82 1079 io[WS(os, 26)] = T5p - T5q;
Chris@82 1080 ro[WS(os, 26)] = T5r - T5u;
Chris@82 1081 io[WS(os, 10)] = T5p + T5q;
Chris@82 1082 ro[WS(os, 10)] = T5r + T5u;
Chris@82 1083 }
Chris@82 1084 {
Chris@82 1085 E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T;
Chris@82 1086 {
Chris@82 1087 E T5x, T5y, T5Q, T5R;
Chris@82 1088 T5x = T7 - Te;
Chris@82 1089 T5y = T1n - T1u;
Chris@82 1090 T5z = T5x + T5y;
Chris@82 1091 T5P = T5x - T5y;
Chris@82 1092 T5Q = T5D - T5A;
Chris@82 1093 T5R = T5F + T5I;
Chris@82 1094 T5S = KP707106781 * (T5Q - T5R);
Chris@82 1095 T5U = KP707106781 * (T5Q + T5R);
Chris@82 1096 }
Chris@82 1097 {
Chris@82 1098 E T5E, T5J, T5L, T5M;
Chris@82 1099 T5E = T5A + T5D;
Chris@82 1100 T5J = T5F - T5I;
Chris@82 1101 T5K = KP707106781 * (T5E + T5J);
Chris@82 1102 T5O = KP707106781 * (T5J - T5E);
Chris@82 1103 T5L = T18 - T1f;
Chris@82 1104 T5M = Tt - Tm;
Chris@82 1105 T5N = T5L - T5M;
Chris@82 1106 T5T = T5M + T5L;
Chris@82 1107 }
Chris@82 1108 ro[WS(os, 20)] = T5z - T5K;
Chris@82 1109 io[WS(os, 20)] = T5T - T5U;
Chris@82 1110 ro[WS(os, 4)] = T5z + T5K;
Chris@82 1111 io[WS(os, 4)] = T5T + T5U;
Chris@82 1112 io[WS(os, 28)] = T5N - T5O;
Chris@82 1113 ro[WS(os, 28)] = T5P - T5S;
Chris@82 1114 io[WS(os, 12)] = T5N + T5O;
Chris@82 1115 ro[WS(os, 12)] = T5P + T5S;
Chris@82 1116 }
Chris@82 1117 {
Chris@82 1118 E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z;
Chris@82 1119 {
Chris@82 1120 E Tf, Tu, T5W, T5X;
Chris@82 1121 Tf = T7 + Te;
Chris@82 1122 Tu = Tm + Tt;
Chris@82 1123 Tv = Tf + Tu;
Chris@82 1124 T5V = Tf - Tu;
Chris@82 1125 T5W = T5B + T5C;
Chris@82 1126 T5X = T5G + T5H;
Chris@82 1127 T5Y = T5W - T5X;
Chris@82 1128 T60 = T5W + T5X;
Chris@82 1129 }
Chris@82 1130 {
Chris@82 1131 E TK, TZ, T1g, T1v;
Chris@82 1132 TK = TC + TJ;
Chris@82 1133 TZ = TR + TY;
Chris@82 1134 T10 = TK + TZ;
Chris@82 1135 T11 = TZ - TK;
Chris@82 1136 T1g = T18 + T1f;
Chris@82 1137 T1v = T1n + T1u;
Chris@82 1138 T1w = T1g - T1v;
Chris@82 1139 T5Z = T1g + T1v;
Chris@82 1140 }
Chris@82 1141 ro[WS(os, 16)] = Tv - T10;
Chris@82 1142 io[WS(os, 16)] = T5Z - T60;
Chris@82 1143 ro[0] = Tv + T10;
Chris@82 1144 io[0] = T5Z + T60;
Chris@82 1145 io[WS(os, 8)] = T11 + T1w;
Chris@82 1146 ro[WS(os, 8)] = T5V + T5Y;
Chris@82 1147 io[WS(os, 24)] = T1w - T11;
Chris@82 1148 ro[WS(os, 24)] = T5V - T5Y;
Chris@82 1149 }
Chris@82 1150 {
Chris@82 1151 E T1X, T33, T31, T37, T2o, T34, T2P, T35;
Chris@82 1152 {
Chris@82 1153 E T1H, T1W, T2X, T30;
Chris@82 1154 T1H = T1z - T1G;
Chris@82 1155 T1W = T1O - T1V;
Chris@82 1156 T1X = T1H + T1W;
Chris@82 1157 T33 = T1H - T1W;
Chris@82 1158 T2X = T2T - T2W;
Chris@82 1159 T30 = T2Y - T2Z;
Chris@82 1160 T31 = T2X - T30;
Chris@82 1161 T37 = T2X + T30;
Chris@82 1162 }
Chris@82 1163 {
Chris@82 1164 E T2e, T2n, T2F, T2O;
Chris@82 1165 T2e = T22 - T2d;
Chris@82 1166 T2n = T2j - T2m;
Chris@82 1167 T2o = FMA(KP980785280, T2e, KP195090322 * T2n);
Chris@82 1168 T34 = FNMS(KP980785280, T2n, KP195090322 * T2e);
Chris@82 1169 T2F = T2t - T2E;
Chris@82 1170 T2O = T2K - T2N;
Chris@82 1171 T2P = FNMS(KP980785280, T2O, KP195090322 * T2F);
Chris@82 1172 T35 = FMA(KP195090322, T2O, KP980785280 * T2F);
Chris@82 1173 }
Chris@82 1174 {
Chris@82 1175 E T2Q, T38, T32, T36;
Chris@82 1176 T2Q = T2o + T2P;
Chris@82 1177 ro[WS(os, 23)] = T1X - T2Q;
Chris@82 1178 ro[WS(os, 7)] = T1X + T2Q;
Chris@82 1179 T38 = T34 + T35;
Chris@82 1180 io[WS(os, 23)] = T37 - T38;
Chris@82 1181 io[WS(os, 7)] = T37 + T38;
Chris@82 1182 T32 = T2P - T2o;
Chris@82 1183 io[WS(os, 31)] = T31 - T32;
Chris@82 1184 io[WS(os, 15)] = T31 + T32;
Chris@82 1185 T36 = T34 - T35;
Chris@82 1186 ro[WS(os, 31)] = T33 - T36;
Chris@82 1187 ro[WS(os, 15)] = T33 + T36;
Chris@82 1188 }
Chris@82 1189 }
Chris@82 1190 {
Chris@82 1191 E T3D, T41, T3Z, T45, T3K, T42, T3R, T43;
Chris@82 1192 {
Chris@82 1193 E T3v, T3C, T3V, T3Y;
Chris@82 1194 T3v = T3t - T3u;
Chris@82 1195 T3C = T3y - T3B;
Chris@82 1196 T3D = T3v + T3C;
Chris@82 1197 T41 = T3v - T3C;
Chris@82 1198 T3V = T3T - T3U;
Chris@82 1199 T3Y = T3W - T3X;
Chris@82 1200 T3Z = T3V - T3Y;
Chris@82 1201 T45 = T3V + T3Y;
Chris@82 1202 }
Chris@82 1203 {
Chris@82 1204 E T3G, T3J, T3N, T3Q;
Chris@82 1205 T3G = T3E - T3F;
Chris@82 1206 T3J = T3H - T3I;
Chris@82 1207 T3K = FMA(KP555570233, T3G, KP831469612 * T3J);
Chris@82 1208 T42 = FNMS(KP831469612, T3G, KP555570233 * T3J);
Chris@82 1209 T3N = T3L - T3M;
Chris@82 1210 T3Q = T3O - T3P;
Chris@82 1211 T3R = FNMS(KP831469612, T3Q, KP555570233 * T3N);
Chris@82 1212 T43 = FMA(KP831469612, T3N, KP555570233 * T3Q);
Chris@82 1213 }
Chris@82 1214 {
Chris@82 1215 E T3S, T46, T40, T44;
Chris@82 1216 T3S = T3K + T3R;
Chris@82 1217 ro[WS(os, 21)] = T3D - T3S;
Chris@82 1218 ro[WS(os, 5)] = T3D + T3S;
Chris@82 1219 T46 = T42 + T43;
Chris@82 1220 io[WS(os, 21)] = T45 - T46;
Chris@82 1221 io[WS(os, 5)] = T45 + T46;
Chris@82 1222 T40 = T3R - T3K;
Chris@82 1223 io[WS(os, 29)] = T3Z - T40;
Chris@82 1224 io[WS(os, 13)] = T3Z + T40;
Chris@82 1225 T44 = T42 - T43;
Chris@82 1226 ro[WS(os, 29)] = T41 - T44;
Chris@82 1227 ro[WS(os, 13)] = T41 + T44;
Chris@82 1228 }
Chris@82 1229 }
Chris@82 1230 {
Chris@82 1231 E T49, T4l, T4j, T4p, T4c, T4m, T4f, T4n;
Chris@82 1232 {
Chris@82 1233 E T47, T48, T4h, T4i;
Chris@82 1234 T47 = T3t + T3u;
Chris@82 1235 T48 = T3X + T3W;
Chris@82 1236 T49 = T47 + T48;
Chris@82 1237 T4l = T47 - T48;
Chris@82 1238 T4h = T3T + T3U;
Chris@82 1239 T4i = T3y + T3B;
Chris@82 1240 T4j = T4h - T4i;
Chris@82 1241 T4p = T4h + T4i;
Chris@82 1242 }
Chris@82 1243 {
Chris@82 1244 E T4a, T4b, T4d, T4e;
Chris@82 1245 T4a = T3E + T3F;
Chris@82 1246 T4b = T3H + T3I;
Chris@82 1247 T4c = FMA(KP980785280, T4a, KP195090322 * T4b);
Chris@82 1248 T4m = FNMS(KP195090322, T4a, KP980785280 * T4b);
Chris@82 1249 T4d = T3L + T3M;
Chris@82 1250 T4e = T3O + T3P;
Chris@82 1251 T4f = FNMS(KP195090322, T4e, KP980785280 * T4d);
Chris@82 1252 T4n = FMA(KP195090322, T4d, KP980785280 * T4e);
Chris@82 1253 }
Chris@82 1254 {
Chris@82 1255 E T4g, T4q, T4k, T4o;
Chris@82 1256 T4g = T4c + T4f;
Chris@82 1257 ro[WS(os, 17)] = T49 - T4g;
Chris@82 1258 ro[WS(os, 1)] = T49 + T4g;
Chris@82 1259 T4q = T4m + T4n;
Chris@82 1260 io[WS(os, 17)] = T4p - T4q;
Chris@82 1261 io[WS(os, 1)] = T4p + T4q;
Chris@82 1262 T4k = T4f - T4c;
Chris@82 1263 io[WS(os, 25)] = T4j - T4k;
Chris@82 1264 io[WS(os, 9)] = T4j + T4k;
Chris@82 1265 T4o = T4m - T4n;
Chris@82 1266 ro[WS(os, 25)] = T4l - T4o;
Chris@82 1267 ro[WS(os, 9)] = T4l + T4o;
Chris@82 1268 }
Chris@82 1269 }
Chris@82 1270 {
Chris@82 1271 E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p;
Chris@82 1272 {
Chris@82 1273 E T39, T3a, T3j, T3k;
Chris@82 1274 T39 = T1z + T1G;
Chris@82 1275 T3a = T2Z + T2Y;
Chris@82 1276 T3b = T39 + T3a;
Chris@82 1277 T3n = T39 - T3a;
Chris@82 1278 T3j = T2T + T2W;
Chris@82 1279 T3k = T1O + T1V;
Chris@82 1280 T3l = T3j - T3k;
Chris@82 1281 T3r = T3j + T3k;
Chris@82 1282 }
Chris@82 1283 {
Chris@82 1284 E T3c, T3d, T3f, T3g;
Chris@82 1285 T3c = T22 + T2d;
Chris@82 1286 T3d = T2j + T2m;
Chris@82 1287 T3e = FMA(KP555570233, T3c, KP831469612 * T3d);
Chris@82 1288 T3o = FNMS(KP555570233, T3d, KP831469612 * T3c);
Chris@82 1289 T3f = T2t + T2E;
Chris@82 1290 T3g = T2K + T2N;
Chris@82 1291 T3h = FNMS(KP555570233, T3g, KP831469612 * T3f);
Chris@82 1292 T3p = FMA(KP831469612, T3g, KP555570233 * T3f);
Chris@82 1293 }
Chris@82 1294 {
Chris@82 1295 E T3i, T3s, T3m, T3q;
Chris@82 1296 T3i = T3e + T3h;
Chris@82 1297 ro[WS(os, 19)] = T3b - T3i;
Chris@82 1298 ro[WS(os, 3)] = T3b + T3i;
Chris@82 1299 T3s = T3o + T3p;
Chris@82 1300 io[WS(os, 19)] = T3r - T3s;
Chris@82 1301 io[WS(os, 3)] = T3r + T3s;
Chris@82 1302 T3m = T3h - T3e;
Chris@82 1303 io[WS(os, 27)] = T3l - T3m;
Chris@82 1304 io[WS(os, 11)] = T3l + T3m;
Chris@82 1305 T3q = T3o - T3p;
Chris@82 1306 ro[WS(os, 27)] = T3n - T3q;
Chris@82 1307 ro[WS(os, 11)] = T3n + T3q;
Chris@82 1308 }
Chris@82 1309 }
Chris@82 1310 }
Chris@82 1311 }
Chris@82 1312 }
Chris@82 1313
Chris@82 1314 static const kdft_desc desc = { 32, "n1_32", {340, 52, 32, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 1315
Chris@82 1316 void X(codelet_n1_32) (planner *p) {
Chris@82 1317 X(kdft_register) (p, n1_32, &desc);
Chris@82 1318 }
Chris@82 1319
Chris@82 1320 #endif