annotate src/fftw-3.3.8/dft/scalar/codelets/n1_20.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:12 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 208 FP additions, 72 FP multiplications,
Chris@82 32 * (or, 136 additions, 0 multiplications, 72 fused multiply/add),
Chris@82 33 * 81 stack variables, 4 constants, and 80 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT i;
Chris@82 45 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
Chris@82 46 E T7, T2N, T3b, TD, TP, T1R, T2f, T1d, Tt, TA, TB, T2w, T2z, T2P, T35;
Chris@82 47 E T36, T3d, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1T, T29, T2a, T2h, T1h;
Chris@82 48 E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2O, T32, T33, T3c, TE, TF, TG, TU;
Chris@82 49 E TZ, T10, T1D, T1I, T1S, T26, T27, T2g, T1e, T1f, T1g;
Chris@82 50 {
Chris@82 51 E T3, T1N, TN, T2L, T6, TO, T1Q, T2M;
Chris@82 52 {
Chris@82 53 E T1, T2, TL, TM;
Chris@82 54 T1 = ri[0];
Chris@82 55 T2 = ri[WS(is, 10)];
Chris@82 56 T3 = T1 + T2;
Chris@82 57 T1N = T1 - T2;
Chris@82 58 TL = ii[0];
Chris@82 59 TM = ii[WS(is, 10)];
Chris@82 60 TN = TL - TM;
Chris@82 61 T2L = TL + TM;
Chris@82 62 }
Chris@82 63 {
Chris@82 64 E T4, T5, T1O, T1P;
Chris@82 65 T4 = ri[WS(is, 5)];
Chris@82 66 T5 = ri[WS(is, 15)];
Chris@82 67 T6 = T4 + T5;
Chris@82 68 TO = T4 - T5;
Chris@82 69 T1O = ii[WS(is, 5)];
Chris@82 70 T1P = ii[WS(is, 15)];
Chris@82 71 T1Q = T1O - T1P;
Chris@82 72 T2M = T1O + T1P;
Chris@82 73 }
Chris@82 74 T7 = T3 - T6;
Chris@82 75 T2N = T2L - T2M;
Chris@82 76 T3b = T2L + T2M;
Chris@82 77 TD = T3 + T6;
Chris@82 78 TP = TN - TO;
Chris@82 79 T1R = T1N - T1Q;
Chris@82 80 T2f = T1N + T1Q;
Chris@82 81 T1d = TO + TN;
Chris@82 82 }
Chris@82 83 {
Chris@82 84 E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
Chris@82 85 E T2y;
Chris@82 86 {
Chris@82 87 E Tn, To, T11, T12;
Chris@82 88 Tn = ri[WS(is, 8)];
Chris@82 89 To = ri[WS(is, 18)];
Chris@82 90 Tp = Tn + To;
Chris@82 91 T1o = Tn - To;
Chris@82 92 T11 = ii[WS(is, 8)];
Chris@82 93 T12 = ii[WS(is, 18)];
Chris@82 94 T13 = T11 - T12;
Chris@82 95 T2u = T11 + T12;
Chris@82 96 }
Chris@82 97 {
Chris@82 98 E Tq, Tr, T1p, T1q;
Chris@82 99 Tq = ri[WS(is, 13)];
Chris@82 100 Tr = ri[WS(is, 3)];
Chris@82 101 Ts = Tq + Tr;
Chris@82 102 T14 = Tq - Tr;
Chris@82 103 T1p = ii[WS(is, 13)];
Chris@82 104 T1q = ii[WS(is, 3)];
Chris@82 105 T1r = T1p - T1q;
Chris@82 106 T2v = T1p + T1q;
Chris@82 107 }
Chris@82 108 {
Chris@82 109 E Tu, Tv, T16, T17;
Chris@82 110 Tu = ri[WS(is, 12)];
Chris@82 111 Tv = ri[WS(is, 2)];
Chris@82 112 Tw = Tu + Tv;
Chris@82 113 T1t = Tu - Tv;
Chris@82 114 T16 = ii[WS(is, 12)];
Chris@82 115 T17 = ii[WS(is, 2)];
Chris@82 116 T18 = T16 - T17;
Chris@82 117 T2x = T16 + T17;
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E Tx, Ty, T1u, T1v;
Chris@82 121 Tx = ri[WS(is, 17)];
Chris@82 122 Ty = ri[WS(is, 7)];
Chris@82 123 Tz = Tx + Ty;
Chris@82 124 T19 = Tx - Ty;
Chris@82 125 T1u = ii[WS(is, 17)];
Chris@82 126 T1v = ii[WS(is, 7)];
Chris@82 127 T1w = T1u - T1v;
Chris@82 128 T2y = T1u + T1v;
Chris@82 129 }
Chris@82 130 Tt = Tp - Ts;
Chris@82 131 TA = Tw - Tz;
Chris@82 132 TB = Tt + TA;
Chris@82 133 T2w = T2u - T2v;
Chris@82 134 T2z = T2x - T2y;
Chris@82 135 T2P = T2w + T2z;
Chris@82 136 T35 = T2u + T2v;
Chris@82 137 T36 = T2x + T2y;
Chris@82 138 T3d = T35 + T36;
Chris@82 139 TH = Tp + Ts;
Chris@82 140 TI = Tw + Tz;
Chris@82 141 TJ = TH + TI;
Chris@82 142 T15 = T13 - T14;
Chris@82 143 T1a = T18 - T19;
Chris@82 144 T1b = T15 + T1a;
Chris@82 145 T1s = T1o - T1r;
Chris@82 146 T1x = T1t - T1w;
Chris@82 147 T1T = T1s + T1x;
Chris@82 148 T29 = T1o + T1r;
Chris@82 149 T2a = T1t + T1w;
Chris@82 150 T2h = T29 + T2a;
Chris@82 151 T1h = T14 + T13;
Chris@82 152 T1i = T19 + T18;
Chris@82 153 T1j = T1h + T1i;
Chris@82 154 }
Chris@82 155 {
Chris@82 156 E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
Chris@82 157 E T2F;
Chris@82 158 {
Chris@82 159 E T8, T9, TQ, TR;
Chris@82 160 T8 = ri[WS(is, 4)];
Chris@82 161 T9 = ri[WS(is, 14)];
Chris@82 162 Ta = T8 + T9;
Chris@82 163 T1z = T8 - T9;
Chris@82 164 TQ = ii[WS(is, 4)];
Chris@82 165 TR = ii[WS(is, 14)];
Chris@82 166 TS = TQ - TR;
Chris@82 167 T2B = TQ + TR;
Chris@82 168 }
Chris@82 169 {
Chris@82 170 E Tb, Tc, T1A, T1B;
Chris@82 171 Tb = ri[WS(is, 9)];
Chris@82 172 Tc = ri[WS(is, 19)];
Chris@82 173 Td = Tb + Tc;
Chris@82 174 TT = Tb - Tc;
Chris@82 175 T1A = ii[WS(is, 9)];
Chris@82 176 T1B = ii[WS(is, 19)];
Chris@82 177 T1C = T1A - T1B;
Chris@82 178 T2C = T1A + T1B;
Chris@82 179 }
Chris@82 180 {
Chris@82 181 E Tf, Tg, TV, TW;
Chris@82 182 Tf = ri[WS(is, 16)];
Chris@82 183 Tg = ri[WS(is, 6)];
Chris@82 184 Th = Tf + Tg;
Chris@82 185 T1E = Tf - Tg;
Chris@82 186 TV = ii[WS(is, 16)];
Chris@82 187 TW = ii[WS(is, 6)];
Chris@82 188 TX = TV - TW;
Chris@82 189 T2E = TV + TW;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E Ti, Tj, T1F, T1G;
Chris@82 193 Ti = ri[WS(is, 1)];
Chris@82 194 Tj = ri[WS(is, 11)];
Chris@82 195 Tk = Ti + Tj;
Chris@82 196 TY = Ti - Tj;
Chris@82 197 T1F = ii[WS(is, 1)];
Chris@82 198 T1G = ii[WS(is, 11)];
Chris@82 199 T1H = T1F - T1G;
Chris@82 200 T2F = T1F + T1G;
Chris@82 201 }
Chris@82 202 Te = Ta - Td;
Chris@82 203 Tl = Th - Tk;
Chris@82 204 Tm = Te + Tl;
Chris@82 205 T2D = T2B - T2C;
Chris@82 206 T2G = T2E - T2F;
Chris@82 207 T2O = T2D + T2G;
Chris@82 208 T32 = T2B + T2C;
Chris@82 209 T33 = T2E + T2F;
Chris@82 210 T3c = T32 + T33;
Chris@82 211 TE = Ta + Td;
Chris@82 212 TF = Th + Tk;
Chris@82 213 TG = TE + TF;
Chris@82 214 TU = TS - TT;
Chris@82 215 TZ = TX - TY;
Chris@82 216 T10 = TU + TZ;
Chris@82 217 T1D = T1z - T1C;
Chris@82 218 T1I = T1E - T1H;
Chris@82 219 T1S = T1D + T1I;
Chris@82 220 T26 = T1z + T1C;
Chris@82 221 T27 = T1E + T1H;
Chris@82 222 T2g = T26 + T27;
Chris@82 223 T1e = TT + TS;
Chris@82 224 T1f = TY + TX;
Chris@82 225 T1g = T1e + T1f;
Chris@82 226 }
Chris@82 227 {
Chris@82 228 E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
Chris@82 229 T2s = Tm - TB;
Chris@82 230 TC = Tm + TB;
Chris@82 231 T2r = FNMS(KP250000000, TC, T7);
Chris@82 232 T2A = T2w - T2z;
Chris@82 233 T2H = T2D - T2G;
Chris@82 234 T2I = FNMS(KP618033988, T2H, T2A);
Chris@82 235 T2K = FMA(KP618033988, T2A, T2H);
Chris@82 236 ro[WS(os, 10)] = T7 + TC;
Chris@82 237 T2J = FMA(KP559016994, T2s, T2r);
Chris@82 238 ro[WS(os, 14)] = FNMS(KP951056516, T2K, T2J);
Chris@82 239 ro[WS(os, 6)] = FMA(KP951056516, T2K, T2J);
Chris@82 240 T2t = FNMS(KP559016994, T2s, T2r);
Chris@82 241 ro[WS(os, 2)] = FNMS(KP951056516, T2I, T2t);
Chris@82 242 ro[WS(os, 18)] = FMA(KP951056516, T2I, T2t);
Chris@82 243 }
Chris@82 244 {
Chris@82 245 E T2S, T2Q, T2R, T2W, T2Y, T2U, T2V, T2X, T2T;
Chris@82 246 T2S = T2O - T2P;
Chris@82 247 T2Q = T2O + T2P;
Chris@82 248 T2R = FNMS(KP250000000, T2Q, T2N);
Chris@82 249 T2U = Tt - TA;
Chris@82 250 T2V = Te - Tl;
Chris@82 251 T2W = FNMS(KP618033988, T2V, T2U);
Chris@82 252 T2Y = FMA(KP618033988, T2U, T2V);
Chris@82 253 io[WS(os, 10)] = T2N + T2Q;
Chris@82 254 T2X = FMA(KP559016994, T2S, T2R);
Chris@82 255 io[WS(os, 6)] = FNMS(KP951056516, T2Y, T2X);
Chris@82 256 io[WS(os, 14)] = FMA(KP951056516, T2Y, T2X);
Chris@82 257 T2T = FNMS(KP559016994, T2S, T2R);
Chris@82 258 io[WS(os, 2)] = FMA(KP951056516, T2W, T2T);
Chris@82 259 io[WS(os, 18)] = FNMS(KP951056516, T2W, T2T);
Chris@82 260 }
Chris@82 261 {
Chris@82 262 E T30, TK, T2Z, T38, T3a, T34, T37, T39, T31;
Chris@82 263 T30 = TG - TJ;
Chris@82 264 TK = TG + TJ;
Chris@82 265 T2Z = FNMS(KP250000000, TK, TD);
Chris@82 266 T34 = T32 - T33;
Chris@82 267 T37 = T35 - T36;
Chris@82 268 T38 = FMA(KP618033988, T37, T34);
Chris@82 269 T3a = FNMS(KP618033988, T34, T37);
Chris@82 270 ro[0] = TD + TK;
Chris@82 271 T39 = FNMS(KP559016994, T30, T2Z);
Chris@82 272 ro[WS(os, 12)] = FNMS(KP951056516, T3a, T39);
Chris@82 273 ro[WS(os, 8)] = FMA(KP951056516, T3a, T39);
Chris@82 274 T31 = FMA(KP559016994, T30, T2Z);
Chris@82 275 ro[WS(os, 4)] = FNMS(KP951056516, T38, T31);
Chris@82 276 ro[WS(os, 16)] = FMA(KP951056516, T38, T31);
Chris@82 277 }
Chris@82 278 {
Chris@82 279 E T3g, T3e, T3f, T3k, T3m, T3i, T3j, T3l, T3h;
Chris@82 280 T3g = T3c - T3d;
Chris@82 281 T3e = T3c + T3d;
Chris@82 282 T3f = FNMS(KP250000000, T3e, T3b);
Chris@82 283 T3i = TE - TF;
Chris@82 284 T3j = TH - TI;
Chris@82 285 T3k = FMA(KP618033988, T3j, T3i);
Chris@82 286 T3m = FNMS(KP618033988, T3i, T3j);
Chris@82 287 io[0] = T3b + T3e;
Chris@82 288 T3l = FNMS(KP559016994, T3g, T3f);
Chris@82 289 io[WS(os, 8)] = FNMS(KP951056516, T3m, T3l);
Chris@82 290 io[WS(os, 12)] = FMA(KP951056516, T3m, T3l);
Chris@82 291 T3h = FMA(KP559016994, T3g, T3f);
Chris@82 292 io[WS(os, 4)] = FMA(KP951056516, T3k, T3h);
Chris@82 293 io[WS(os, 16)] = FNMS(KP951056516, T3k, T3h);
Chris@82 294 }
Chris@82 295 {
Chris@82 296 E T24, T1c, T23, T2c, T2e, T28, T2b, T2d, T25;
Chris@82 297 T24 = T10 - T1b;
Chris@82 298 T1c = T10 + T1b;
Chris@82 299 T23 = FNMS(KP250000000, T1c, TP);
Chris@82 300 T28 = T26 - T27;
Chris@82 301 T2b = T29 - T2a;
Chris@82 302 T2c = FMA(KP618033988, T2b, T28);
Chris@82 303 T2e = FNMS(KP618033988, T28, T2b);
Chris@82 304 io[WS(os, 5)] = TP + T1c;
Chris@82 305 T2d = FNMS(KP559016994, T24, T23);
Chris@82 306 io[WS(os, 13)] = FNMS(KP951056516, T2e, T2d);
Chris@82 307 io[WS(os, 17)] = FMA(KP951056516, T2e, T2d);
Chris@82 308 T25 = FMA(KP559016994, T24, T23);
Chris@82 309 io[WS(os, 1)] = FNMS(KP951056516, T2c, T25);
Chris@82 310 io[WS(os, 9)] = FMA(KP951056516, T2c, T25);
Chris@82 311 }
Chris@82 312 {
Chris@82 313 E T2k, T2i, T2j, T2o, T2q, T2m, T2n, T2p, T2l;
Chris@82 314 T2k = T2g - T2h;
Chris@82 315 T2i = T2g + T2h;
Chris@82 316 T2j = FNMS(KP250000000, T2i, T2f);
Chris@82 317 T2m = TU - TZ;
Chris@82 318 T2n = T15 - T1a;
Chris@82 319 T2o = FMA(KP618033988, T2n, T2m);
Chris@82 320 T2q = FNMS(KP618033988, T2m, T2n);
Chris@82 321 ro[WS(os, 5)] = T2f + T2i;
Chris@82 322 T2p = FNMS(KP559016994, T2k, T2j);
Chris@82 323 ro[WS(os, 13)] = FMA(KP951056516, T2q, T2p);
Chris@82 324 ro[WS(os, 17)] = FNMS(KP951056516, T2q, T2p);
Chris@82 325 T2l = FMA(KP559016994, T2k, T2j);
Chris@82 326 ro[WS(os, 1)] = FMA(KP951056516, T2o, T2l);
Chris@82 327 ro[WS(os, 9)] = FNMS(KP951056516, T2o, T2l);
Chris@82 328 }
Chris@82 329 {
Chris@82 330 E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
Chris@82 331 T1m = T1g - T1j;
Chris@82 332 T1k = T1g + T1j;
Chris@82 333 T1l = FNMS(KP250000000, T1k, T1d);
Chris@82 334 T1y = T1s - T1x;
Chris@82 335 T1J = T1D - T1I;
Chris@82 336 T1K = FNMS(KP618033988, T1J, T1y);
Chris@82 337 T1M = FMA(KP618033988, T1y, T1J);
Chris@82 338 io[WS(os, 15)] = T1d + T1k;
Chris@82 339 T1L = FMA(KP559016994, T1m, T1l);
Chris@82 340 io[WS(os, 11)] = FNMS(KP951056516, T1M, T1L);
Chris@82 341 io[WS(os, 19)] = FMA(KP951056516, T1M, T1L);
Chris@82 342 T1n = FNMS(KP559016994, T1m, T1l);
Chris@82 343 io[WS(os, 3)] = FNMS(KP951056516, T1K, T1n);
Chris@82 344 io[WS(os, 7)] = FMA(KP951056516, T1K, T1n);
Chris@82 345 }
Chris@82 346 {
Chris@82 347 E T1W, T1U, T1V, T20, T22, T1Y, T1Z, T21, T1X;
Chris@82 348 T1W = T1S - T1T;
Chris@82 349 T1U = T1S + T1T;
Chris@82 350 T1V = FNMS(KP250000000, T1U, T1R);
Chris@82 351 T1Y = T1h - T1i;
Chris@82 352 T1Z = T1e - T1f;
Chris@82 353 T20 = FNMS(KP618033988, T1Z, T1Y);
Chris@82 354 T22 = FMA(KP618033988, T1Y, T1Z);
Chris@82 355 ro[WS(os, 15)] = T1R + T1U;
Chris@82 356 T21 = FMA(KP559016994, T1W, T1V);
Chris@82 357 ro[WS(os, 11)] = FMA(KP951056516, T22, T21);
Chris@82 358 ro[WS(os, 19)] = FNMS(KP951056516, T22, T21);
Chris@82 359 T1X = FNMS(KP559016994, T1W, T1V);
Chris@82 360 ro[WS(os, 3)] = FMA(KP951056516, T20, T1X);
Chris@82 361 ro[WS(os, 7)] = FNMS(KP951056516, T20, T1X);
Chris@82 362 }
Chris@82 363 }
Chris@82 364 }
Chris@82 365 }
Chris@82 366
Chris@82 367 static const kdft_desc desc = { 20, "n1_20", {136, 0, 72, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 368
Chris@82 369 void X(codelet_n1_20) (planner *p) {
Chris@82 370 X(kdft_register) (p, n1_20, &desc);
Chris@82 371 }
Chris@82 372
Chris@82 373 #else
Chris@82 374
Chris@82 375 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
Chris@82 376
Chris@82 377 /*
Chris@82 378 * This function contains 208 FP additions, 48 FP multiplications,
Chris@82 379 * (or, 184 additions, 24 multiplications, 24 fused multiply/add),
Chris@82 380 * 81 stack variables, 4 constants, and 80 memory accesses
Chris@82 381 */
Chris@82 382 #include "dft/scalar/n.h"
Chris@82 383
Chris@82 384 static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 385 {
Chris@82 386 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 387 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 388 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 389 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 390 {
Chris@82 391 INT i;
Chris@82 392 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
Chris@82 393 E T7, T2Q, T3h, TD, TP, T1U, T2l, T1d, Tt, TA, TB, T2w, T2z, T2S, T35;
Chris@82 394 E T36, T3f, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1W, T29, T2a, T2j, T1h;
Chris@82 395 E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2R, T32, T33, T3e, TE, TF, TG, TU;
Chris@82 396 E TZ, T10, T1D, T1I, T1V, T26, T27, T2i, T1e, T1f, T1g;
Chris@82 397 {
Chris@82 398 E T3, T1Q, TN, T2O, T6, TO, T1T, T2P;
Chris@82 399 {
Chris@82 400 E T1, T2, TL, TM;
Chris@82 401 T1 = ri[0];
Chris@82 402 T2 = ri[WS(is, 10)];
Chris@82 403 T3 = T1 + T2;
Chris@82 404 T1Q = T1 - T2;
Chris@82 405 TL = ii[0];
Chris@82 406 TM = ii[WS(is, 10)];
Chris@82 407 TN = TL - TM;
Chris@82 408 T2O = TL + TM;
Chris@82 409 }
Chris@82 410 {
Chris@82 411 E T4, T5, T1R, T1S;
Chris@82 412 T4 = ri[WS(is, 5)];
Chris@82 413 T5 = ri[WS(is, 15)];
Chris@82 414 T6 = T4 + T5;
Chris@82 415 TO = T4 - T5;
Chris@82 416 T1R = ii[WS(is, 5)];
Chris@82 417 T1S = ii[WS(is, 15)];
Chris@82 418 T1T = T1R - T1S;
Chris@82 419 T2P = T1R + T1S;
Chris@82 420 }
Chris@82 421 T7 = T3 - T6;
Chris@82 422 T2Q = T2O - T2P;
Chris@82 423 T3h = T2O + T2P;
Chris@82 424 TD = T3 + T6;
Chris@82 425 TP = TN - TO;
Chris@82 426 T1U = T1Q - T1T;
Chris@82 427 T2l = T1Q + T1T;
Chris@82 428 T1d = TO + TN;
Chris@82 429 }
Chris@82 430 {
Chris@82 431 E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
Chris@82 432 E T2y;
Chris@82 433 {
Chris@82 434 E Tn, To, T11, T12;
Chris@82 435 Tn = ri[WS(is, 8)];
Chris@82 436 To = ri[WS(is, 18)];
Chris@82 437 Tp = Tn + To;
Chris@82 438 T1o = Tn - To;
Chris@82 439 T11 = ii[WS(is, 8)];
Chris@82 440 T12 = ii[WS(is, 18)];
Chris@82 441 T13 = T11 - T12;
Chris@82 442 T2u = T11 + T12;
Chris@82 443 }
Chris@82 444 {
Chris@82 445 E Tq, Tr, T1p, T1q;
Chris@82 446 Tq = ri[WS(is, 13)];
Chris@82 447 Tr = ri[WS(is, 3)];
Chris@82 448 Ts = Tq + Tr;
Chris@82 449 T14 = Tq - Tr;
Chris@82 450 T1p = ii[WS(is, 13)];
Chris@82 451 T1q = ii[WS(is, 3)];
Chris@82 452 T1r = T1p - T1q;
Chris@82 453 T2v = T1p + T1q;
Chris@82 454 }
Chris@82 455 {
Chris@82 456 E Tu, Tv, T16, T17;
Chris@82 457 Tu = ri[WS(is, 12)];
Chris@82 458 Tv = ri[WS(is, 2)];
Chris@82 459 Tw = Tu + Tv;
Chris@82 460 T1t = Tu - Tv;
Chris@82 461 T16 = ii[WS(is, 12)];
Chris@82 462 T17 = ii[WS(is, 2)];
Chris@82 463 T18 = T16 - T17;
Chris@82 464 T2x = T16 + T17;
Chris@82 465 }
Chris@82 466 {
Chris@82 467 E Tx, Ty, T1u, T1v;
Chris@82 468 Tx = ri[WS(is, 17)];
Chris@82 469 Ty = ri[WS(is, 7)];
Chris@82 470 Tz = Tx + Ty;
Chris@82 471 T19 = Tx - Ty;
Chris@82 472 T1u = ii[WS(is, 17)];
Chris@82 473 T1v = ii[WS(is, 7)];
Chris@82 474 T1w = T1u - T1v;
Chris@82 475 T2y = T1u + T1v;
Chris@82 476 }
Chris@82 477 Tt = Tp - Ts;
Chris@82 478 TA = Tw - Tz;
Chris@82 479 TB = Tt + TA;
Chris@82 480 T2w = T2u - T2v;
Chris@82 481 T2z = T2x - T2y;
Chris@82 482 T2S = T2w + T2z;
Chris@82 483 T35 = T2u + T2v;
Chris@82 484 T36 = T2x + T2y;
Chris@82 485 T3f = T35 + T36;
Chris@82 486 TH = Tp + Ts;
Chris@82 487 TI = Tw + Tz;
Chris@82 488 TJ = TH + TI;
Chris@82 489 T15 = T13 - T14;
Chris@82 490 T1a = T18 - T19;
Chris@82 491 T1b = T15 + T1a;
Chris@82 492 T1s = T1o - T1r;
Chris@82 493 T1x = T1t - T1w;
Chris@82 494 T1W = T1s + T1x;
Chris@82 495 T29 = T1o + T1r;
Chris@82 496 T2a = T1t + T1w;
Chris@82 497 T2j = T29 + T2a;
Chris@82 498 T1h = T14 + T13;
Chris@82 499 T1i = T19 + T18;
Chris@82 500 T1j = T1h + T1i;
Chris@82 501 }
Chris@82 502 {
Chris@82 503 E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
Chris@82 504 E T2F;
Chris@82 505 {
Chris@82 506 E T8, T9, TQ, TR;
Chris@82 507 T8 = ri[WS(is, 4)];
Chris@82 508 T9 = ri[WS(is, 14)];
Chris@82 509 Ta = T8 + T9;
Chris@82 510 T1z = T8 - T9;
Chris@82 511 TQ = ii[WS(is, 4)];
Chris@82 512 TR = ii[WS(is, 14)];
Chris@82 513 TS = TQ - TR;
Chris@82 514 T2B = TQ + TR;
Chris@82 515 }
Chris@82 516 {
Chris@82 517 E Tb, Tc, T1A, T1B;
Chris@82 518 Tb = ri[WS(is, 9)];
Chris@82 519 Tc = ri[WS(is, 19)];
Chris@82 520 Td = Tb + Tc;
Chris@82 521 TT = Tb - Tc;
Chris@82 522 T1A = ii[WS(is, 9)];
Chris@82 523 T1B = ii[WS(is, 19)];
Chris@82 524 T1C = T1A - T1B;
Chris@82 525 T2C = T1A + T1B;
Chris@82 526 }
Chris@82 527 {
Chris@82 528 E Tf, Tg, TV, TW;
Chris@82 529 Tf = ri[WS(is, 16)];
Chris@82 530 Tg = ri[WS(is, 6)];
Chris@82 531 Th = Tf + Tg;
Chris@82 532 T1E = Tf - Tg;
Chris@82 533 TV = ii[WS(is, 16)];
Chris@82 534 TW = ii[WS(is, 6)];
Chris@82 535 TX = TV - TW;
Chris@82 536 T2E = TV + TW;
Chris@82 537 }
Chris@82 538 {
Chris@82 539 E Ti, Tj, T1F, T1G;
Chris@82 540 Ti = ri[WS(is, 1)];
Chris@82 541 Tj = ri[WS(is, 11)];
Chris@82 542 Tk = Ti + Tj;
Chris@82 543 TY = Ti - Tj;
Chris@82 544 T1F = ii[WS(is, 1)];
Chris@82 545 T1G = ii[WS(is, 11)];
Chris@82 546 T1H = T1F - T1G;
Chris@82 547 T2F = T1F + T1G;
Chris@82 548 }
Chris@82 549 Te = Ta - Td;
Chris@82 550 Tl = Th - Tk;
Chris@82 551 Tm = Te + Tl;
Chris@82 552 T2D = T2B - T2C;
Chris@82 553 T2G = T2E - T2F;
Chris@82 554 T2R = T2D + T2G;
Chris@82 555 T32 = T2B + T2C;
Chris@82 556 T33 = T2E + T2F;
Chris@82 557 T3e = T32 + T33;
Chris@82 558 TE = Ta + Td;
Chris@82 559 TF = Th + Tk;
Chris@82 560 TG = TE + TF;
Chris@82 561 TU = TS - TT;
Chris@82 562 TZ = TX - TY;
Chris@82 563 T10 = TU + TZ;
Chris@82 564 T1D = T1z - T1C;
Chris@82 565 T1I = T1E - T1H;
Chris@82 566 T1V = T1D + T1I;
Chris@82 567 T26 = T1z + T1C;
Chris@82 568 T27 = T1E + T1H;
Chris@82 569 T2i = T26 + T27;
Chris@82 570 T1e = TT + TS;
Chris@82 571 T1f = TY + TX;
Chris@82 572 T1g = T1e + T1f;
Chris@82 573 }
Chris@82 574 {
Chris@82 575 E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
Chris@82 576 T2s = KP559016994 * (Tm - TB);
Chris@82 577 TC = Tm + TB;
Chris@82 578 T2r = FNMS(KP250000000, TC, T7);
Chris@82 579 T2A = T2w - T2z;
Chris@82 580 T2H = T2D - T2G;
Chris@82 581 T2I = FNMS(KP587785252, T2H, KP951056516 * T2A);
Chris@82 582 T2K = FMA(KP951056516, T2H, KP587785252 * T2A);
Chris@82 583 ro[WS(os, 10)] = T7 + TC;
Chris@82 584 T2J = T2s + T2r;
Chris@82 585 ro[WS(os, 14)] = T2J - T2K;
Chris@82 586 ro[WS(os, 6)] = T2J + T2K;
Chris@82 587 T2t = T2r - T2s;
Chris@82 588 ro[WS(os, 2)] = T2t - T2I;
Chris@82 589 ro[WS(os, 18)] = T2t + T2I;
Chris@82 590 }
Chris@82 591 {
Chris@82 592 E T2V, T2T, T2U, T2N, T2Y, T2L, T2M, T2X, T2W;
Chris@82 593 T2V = KP559016994 * (T2R - T2S);
Chris@82 594 T2T = T2R + T2S;
Chris@82 595 T2U = FNMS(KP250000000, T2T, T2Q);
Chris@82 596 T2L = Tt - TA;
Chris@82 597 T2M = Te - Tl;
Chris@82 598 T2N = FNMS(KP587785252, T2M, KP951056516 * T2L);
Chris@82 599 T2Y = FMA(KP951056516, T2M, KP587785252 * T2L);
Chris@82 600 io[WS(os, 10)] = T2Q + T2T;
Chris@82 601 T2X = T2V + T2U;
Chris@82 602 io[WS(os, 6)] = T2X - T2Y;
Chris@82 603 io[WS(os, 14)] = T2Y + T2X;
Chris@82 604 T2W = T2U - T2V;
Chris@82 605 io[WS(os, 2)] = T2N + T2W;
Chris@82 606 io[WS(os, 18)] = T2W - T2N;
Chris@82 607 }
Chris@82 608 {
Chris@82 609 E T2Z, TK, T30, T38, T3a, T34, T37, T39, T31;
Chris@82 610 T2Z = KP559016994 * (TG - TJ);
Chris@82 611 TK = TG + TJ;
Chris@82 612 T30 = FNMS(KP250000000, TK, TD);
Chris@82 613 T34 = T32 - T33;
Chris@82 614 T37 = T35 - T36;
Chris@82 615 T38 = FMA(KP951056516, T34, KP587785252 * T37);
Chris@82 616 T3a = FNMS(KP587785252, T34, KP951056516 * T37);
Chris@82 617 ro[0] = TD + TK;
Chris@82 618 T39 = T30 - T2Z;
Chris@82 619 ro[WS(os, 12)] = T39 - T3a;
Chris@82 620 ro[WS(os, 8)] = T39 + T3a;
Chris@82 621 T31 = T2Z + T30;
Chris@82 622 ro[WS(os, 4)] = T31 - T38;
Chris@82 623 ro[WS(os, 16)] = T31 + T38;
Chris@82 624 }
Chris@82 625 {
Chris@82 626 E T3g, T3i, T3j, T3d, T3m, T3b, T3c, T3l, T3k;
Chris@82 627 T3g = KP559016994 * (T3e - T3f);
Chris@82 628 T3i = T3e + T3f;
Chris@82 629 T3j = FNMS(KP250000000, T3i, T3h);
Chris@82 630 T3b = TE - TF;
Chris@82 631 T3c = TH - TI;
Chris@82 632 T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
Chris@82 633 T3m = FNMS(KP587785252, T3b, KP951056516 * T3c);
Chris@82 634 io[0] = T3h + T3i;
Chris@82 635 T3l = T3j - T3g;
Chris@82 636 io[WS(os, 8)] = T3l - T3m;
Chris@82 637 io[WS(os, 12)] = T3m + T3l;
Chris@82 638 T3k = T3g + T3j;
Chris@82 639 io[WS(os, 4)] = T3d + T3k;
Chris@82 640 io[WS(os, 16)] = T3k - T3d;
Chris@82 641 }
Chris@82 642 {
Chris@82 643 E T23, T1c, T24, T2c, T2e, T28, T2b, T2d, T25;
Chris@82 644 T23 = KP559016994 * (T10 - T1b);
Chris@82 645 T1c = T10 + T1b;
Chris@82 646 T24 = FNMS(KP250000000, T1c, TP);
Chris@82 647 T28 = T26 - T27;
Chris@82 648 T2b = T29 - T2a;
Chris@82 649 T2c = FMA(KP951056516, T28, KP587785252 * T2b);
Chris@82 650 T2e = FNMS(KP587785252, T28, KP951056516 * T2b);
Chris@82 651 io[WS(os, 5)] = TP + T1c;
Chris@82 652 T2d = T24 - T23;
Chris@82 653 io[WS(os, 13)] = T2d - T2e;
Chris@82 654 io[WS(os, 17)] = T2d + T2e;
Chris@82 655 T25 = T23 + T24;
Chris@82 656 io[WS(os, 1)] = T25 - T2c;
Chris@82 657 io[WS(os, 9)] = T25 + T2c;
Chris@82 658 }
Chris@82 659 {
Chris@82 660 E T2k, T2m, T2n, T2h, T2p, T2f, T2g, T2q, T2o;
Chris@82 661 T2k = KP559016994 * (T2i - T2j);
Chris@82 662 T2m = T2i + T2j;
Chris@82 663 T2n = FNMS(KP250000000, T2m, T2l);
Chris@82 664 T2f = TU - TZ;
Chris@82 665 T2g = T15 - T1a;
Chris@82 666 T2h = FMA(KP951056516, T2f, KP587785252 * T2g);
Chris@82 667 T2p = FNMS(KP587785252, T2f, KP951056516 * T2g);
Chris@82 668 ro[WS(os, 5)] = T2l + T2m;
Chris@82 669 T2q = T2n - T2k;
Chris@82 670 ro[WS(os, 13)] = T2p + T2q;
Chris@82 671 ro[WS(os, 17)] = T2q - T2p;
Chris@82 672 T2o = T2k + T2n;
Chris@82 673 ro[WS(os, 1)] = T2h + T2o;
Chris@82 674 ro[WS(os, 9)] = T2o - T2h;
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
Chris@82 678 T1m = KP559016994 * (T1g - T1j);
Chris@82 679 T1k = T1g + T1j;
Chris@82 680 T1l = FNMS(KP250000000, T1k, T1d);
Chris@82 681 T1y = T1s - T1x;
Chris@82 682 T1J = T1D - T1I;
Chris@82 683 T1K = FNMS(KP587785252, T1J, KP951056516 * T1y);
Chris@82 684 T1M = FMA(KP951056516, T1J, KP587785252 * T1y);
Chris@82 685 io[WS(os, 15)] = T1d + T1k;
Chris@82 686 T1L = T1m + T1l;
Chris@82 687 io[WS(os, 11)] = T1L - T1M;
Chris@82 688 io[WS(os, 19)] = T1L + T1M;
Chris@82 689 T1n = T1l - T1m;
Chris@82 690 io[WS(os, 3)] = T1n - T1K;
Chris@82 691 io[WS(os, 7)] = T1n + T1K;
Chris@82 692 }
Chris@82 693 {
Chris@82 694 E T1Z, T1X, T1Y, T1P, T21, T1N, T1O, T22, T20;
Chris@82 695 T1Z = KP559016994 * (T1V - T1W);
Chris@82 696 T1X = T1V + T1W;
Chris@82 697 T1Y = FNMS(KP250000000, T1X, T1U);
Chris@82 698 T1N = T1h - T1i;
Chris@82 699 T1O = T1e - T1f;
Chris@82 700 T1P = FNMS(KP587785252, T1O, KP951056516 * T1N);
Chris@82 701 T21 = FMA(KP951056516, T1O, KP587785252 * T1N);
Chris@82 702 ro[WS(os, 15)] = T1U + T1X;
Chris@82 703 T22 = T1Z + T1Y;
Chris@82 704 ro[WS(os, 11)] = T21 + T22;
Chris@82 705 ro[WS(os, 19)] = T22 - T21;
Chris@82 706 T20 = T1Y - T1Z;
Chris@82 707 ro[WS(os, 3)] = T1P + T20;
Chris@82 708 ro[WS(os, 7)] = T20 - T1P;
Chris@82 709 }
Chris@82 710 }
Chris@82 711 }
Chris@82 712 }
Chris@82 713
Chris@82 714 static const kdft_desc desc = { 20, "n1_20", {184, 24, 24, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 715
Chris@82 716 void X(codelet_n1_20) (planner *p) {
Chris@82 717 X(kdft_register) (p, n1_20, &desc);
Chris@82 718 }
Chris@82 719
Chris@82 720 #endif