annotate src/fftw-3.3.8/dft/scalar/codelets/n1_16.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:11 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 144 FP additions, 40 FP multiplications,
Chris@82 32 * (or, 104 additions, 0 multiplications, 40 fused multiply/add),
Chris@82 33 * 50 stack variables, 3 constants, and 64 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 40 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 41 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 42 {
Chris@82 43 INT i;
Chris@82 44 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) {
Chris@82 45 E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
Chris@82 46 E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
Chris@82 47 E T1U, T1A;
Chris@82 48 {
Chris@82 49 E T3, TL, Ty, T1k, T6, T1j, TB, TM;
Chris@82 50 {
Chris@82 51 E T1, T2, Tw, Tx;
Chris@82 52 T1 = ri[0];
Chris@82 53 T2 = ri[WS(is, 8)];
Chris@82 54 T3 = T1 + T2;
Chris@82 55 TL = T1 - T2;
Chris@82 56 Tw = ii[0];
Chris@82 57 Tx = ii[WS(is, 8)];
Chris@82 58 Ty = Tw + Tx;
Chris@82 59 T1k = Tw - Tx;
Chris@82 60 }
Chris@82 61 {
Chris@82 62 E T4, T5, Tz, TA;
Chris@82 63 T4 = ri[WS(is, 4)];
Chris@82 64 T5 = ri[WS(is, 12)];
Chris@82 65 T6 = T4 + T5;
Chris@82 66 T1j = T4 - T5;
Chris@82 67 Tz = ii[WS(is, 4)];
Chris@82 68 TA = ii[WS(is, 12)];
Chris@82 69 TB = Tz + TA;
Chris@82 70 TM = Tz - TA;
Chris@82 71 }
Chris@82 72 T7 = T3 + T6;
Chris@82 73 T1R = T3 - T6;
Chris@82 74 T25 = Ty - TB;
Chris@82 75 TC = Ty + TB;
Chris@82 76 TN = TL - TM;
Chris@82 77 T1x = TL + TM;
Chris@82 78 T1H = T1k - T1j;
Chris@82 79 T1l = T1j + T1k;
Chris@82 80 }
Chris@82 81 {
Chris@82 82 E Tp, T1c, T1a, T20, Ts, T17, T1f, T21;
Chris@82 83 {
Chris@82 84 E Tn, To, T18, T19;
Chris@82 85 Tn = ri[WS(is, 15)];
Chris@82 86 To = ri[WS(is, 7)];
Chris@82 87 Tp = Tn + To;
Chris@82 88 T1c = Tn - To;
Chris@82 89 T18 = ii[WS(is, 15)];
Chris@82 90 T19 = ii[WS(is, 7)];
Chris@82 91 T1a = T18 - T19;
Chris@82 92 T20 = T18 + T19;
Chris@82 93 }
Chris@82 94 {
Chris@82 95 E Tq, Tr, T1d, T1e;
Chris@82 96 Tq = ri[WS(is, 3)];
Chris@82 97 Tr = ri[WS(is, 11)];
Chris@82 98 Ts = Tq + Tr;
Chris@82 99 T17 = Tq - Tr;
Chris@82 100 T1d = ii[WS(is, 3)];
Chris@82 101 T1e = ii[WS(is, 11)];
Chris@82 102 T1f = T1d - T1e;
Chris@82 103 T21 = T1d + T1e;
Chris@82 104 }
Chris@82 105 Tt = Tp + Ts;
Chris@82 106 T22 = T20 - T21;
Chris@82 107 T2h = T20 + T21;
Chris@82 108 T1b = T17 + T1a;
Chris@82 109 T1g = T1c - T1f;
Chris@82 110 T1E = T1a - T17;
Chris@82 111 T1Z = Tp - Ts;
Chris@82 112 T1D = T1c + T1f;
Chris@82 113 }
Chris@82 114 {
Chris@82 115 E Ta, TP, TF, TO, Td, TR, TI, TS;
Chris@82 116 {
Chris@82 117 E T8, T9, TD, TE;
Chris@82 118 T8 = ri[WS(is, 2)];
Chris@82 119 T9 = ri[WS(is, 10)];
Chris@82 120 Ta = T8 + T9;
Chris@82 121 TP = T8 - T9;
Chris@82 122 TD = ii[WS(is, 2)];
Chris@82 123 TE = ii[WS(is, 10)];
Chris@82 124 TF = TD + TE;
Chris@82 125 TO = TD - TE;
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E Tb, Tc, TG, TH;
Chris@82 129 Tb = ri[WS(is, 14)];
Chris@82 130 Tc = ri[WS(is, 6)];
Chris@82 131 Td = Tb + Tc;
Chris@82 132 TR = Tb - Tc;
Chris@82 133 TG = ii[WS(is, 14)];
Chris@82 134 TH = ii[WS(is, 6)];
Chris@82 135 TI = TG + TH;
Chris@82 136 TS = TG - TH;
Chris@82 137 }
Chris@82 138 Te = Ta + Td;
Chris@82 139 T1S = TF - TI;
Chris@82 140 T26 = Td - Ta;
Chris@82 141 TJ = TF + TI;
Chris@82 142 TQ = TO - TP;
Chris@82 143 T1m = TR - TS;
Chris@82 144 T1n = TP + TO;
Chris@82 145 TT = TR + TS;
Chris@82 146 }
Chris@82 147 {
Chris@82 148 E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
Chris@82 149 {
Chris@82 150 E Tg, Th, TX, TY;
Chris@82 151 Tg = ri[WS(is, 1)];
Chris@82 152 Th = ri[WS(is, 9)];
Chris@82 153 Ti = Tg + Th;
Chris@82 154 T11 = Tg - Th;
Chris@82 155 TX = ii[WS(is, 1)];
Chris@82 156 TY = ii[WS(is, 9)];
Chris@82 157 TZ = TX - TY;
Chris@82 158 T1V = TX + TY;
Chris@82 159 }
Chris@82 160 {
Chris@82 161 E Tj, Tk, T12, T13;
Chris@82 162 Tj = ri[WS(is, 5)];
Chris@82 163 Tk = ri[WS(is, 13)];
Chris@82 164 Tl = Tj + Tk;
Chris@82 165 TW = Tj - Tk;
Chris@82 166 T12 = ii[WS(is, 5)];
Chris@82 167 T13 = ii[WS(is, 13)];
Chris@82 168 T14 = T12 - T13;
Chris@82 169 T1W = T12 + T13;
Chris@82 170 }
Chris@82 171 Tm = Ti + Tl;
Chris@82 172 T1X = T1V - T1W;
Chris@82 173 T2g = T1V + T1W;
Chris@82 174 T10 = TW + TZ;
Chris@82 175 T15 = T11 - T14;
Chris@82 176 T1B = TZ - TW;
Chris@82 177 T1U = Ti - Tl;
Chris@82 178 T1A = T11 + T14;
Chris@82 179 }
Chris@82 180 {
Chris@82 181 E Tf, Tu, T2j, T2k;
Chris@82 182 Tf = T7 + Te;
Chris@82 183 Tu = Tm + Tt;
Chris@82 184 ro[WS(os, 8)] = Tf - Tu;
Chris@82 185 ro[0] = Tf + Tu;
Chris@82 186 T2j = TC + TJ;
Chris@82 187 T2k = T2g + T2h;
Chris@82 188 io[WS(os, 8)] = T2j - T2k;
Chris@82 189 io[0] = T2j + T2k;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E Tv, TK, T2f, T2i;
Chris@82 193 Tv = Tt - Tm;
Chris@82 194 TK = TC - TJ;
Chris@82 195 io[WS(os, 4)] = Tv + TK;
Chris@82 196 io[WS(os, 12)] = TK - Tv;
Chris@82 197 T2f = T7 - Te;
Chris@82 198 T2i = T2g - T2h;
Chris@82 199 ro[WS(os, 12)] = T2f - T2i;
Chris@82 200 ro[WS(os, 4)] = T2f + T2i;
Chris@82 201 }
Chris@82 202 {
Chris@82 203 E T1T, T27, T24, T28, T1Y, T23;
Chris@82 204 T1T = T1R + T1S;
Chris@82 205 T27 = T25 - T26;
Chris@82 206 T1Y = T1U + T1X;
Chris@82 207 T23 = T1Z - T22;
Chris@82 208 T24 = T1Y + T23;
Chris@82 209 T28 = T23 - T1Y;
Chris@82 210 ro[WS(os, 10)] = FNMS(KP707106781, T24, T1T);
Chris@82 211 io[WS(os, 6)] = FMA(KP707106781, T28, T27);
Chris@82 212 ro[WS(os, 2)] = FMA(KP707106781, T24, T1T);
Chris@82 213 io[WS(os, 14)] = FNMS(KP707106781, T28, T27);
Chris@82 214 }
Chris@82 215 {
Chris@82 216 E T29, T2d, T2c, T2e, T2a, T2b;
Chris@82 217 T29 = T1R - T1S;
Chris@82 218 T2d = T26 + T25;
Chris@82 219 T2a = T1X - T1U;
Chris@82 220 T2b = T1Z + T22;
Chris@82 221 T2c = T2a - T2b;
Chris@82 222 T2e = T2a + T2b;
Chris@82 223 ro[WS(os, 14)] = FNMS(KP707106781, T2c, T29);
Chris@82 224 io[WS(os, 2)] = FMA(KP707106781, T2e, T2d);
Chris@82 225 ro[WS(os, 6)] = FMA(KP707106781, T2c, T29);
Chris@82 226 io[WS(os, 10)] = FNMS(KP707106781, T2e, T2d);
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E TV, T1v, T1p, T1r, T1i, T1q, T1u, T1w, TU, T1o;
Chris@82 230 TU = TQ - TT;
Chris@82 231 TV = FMA(KP707106781, TU, TN);
Chris@82 232 T1v = FNMS(KP707106781, TU, TN);
Chris@82 233 T1o = T1m - T1n;
Chris@82 234 T1p = FNMS(KP707106781, T1o, T1l);
Chris@82 235 T1r = FMA(KP707106781, T1o, T1l);
Chris@82 236 {
Chris@82 237 E T16, T1h, T1s, T1t;
Chris@82 238 T16 = FMA(KP414213562, T15, T10);
Chris@82 239 T1h = FNMS(KP414213562, T1g, T1b);
Chris@82 240 T1i = T16 - T1h;
Chris@82 241 T1q = T16 + T1h;
Chris@82 242 T1s = FMA(KP414213562, T1b, T1g);
Chris@82 243 T1t = FNMS(KP414213562, T10, T15);
Chris@82 244 T1u = T1s - T1t;
Chris@82 245 T1w = T1t + T1s;
Chris@82 246 }
Chris@82 247 ro[WS(os, 11)] = FNMS(KP923879532, T1i, TV);
Chris@82 248 io[WS(os, 11)] = FNMS(KP923879532, T1u, T1r);
Chris@82 249 ro[WS(os, 3)] = FMA(KP923879532, T1i, TV);
Chris@82 250 io[WS(os, 3)] = FMA(KP923879532, T1u, T1r);
Chris@82 251 io[WS(os, 7)] = FNMS(KP923879532, T1q, T1p);
Chris@82 252 ro[WS(os, 7)] = FNMS(KP923879532, T1w, T1v);
Chris@82 253 io[WS(os, 15)] = FMA(KP923879532, T1q, T1p);
Chris@82 254 ro[WS(os, 15)] = FMA(KP923879532, T1w, T1v);
Chris@82 255 }
Chris@82 256 {
Chris@82 257 E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
Chris@82 258 T1y = T1n + T1m;
Chris@82 259 T1z = FMA(KP707106781, T1y, T1x);
Chris@82 260 T1L = FNMS(KP707106781, T1y, T1x);
Chris@82 261 T1I = TQ + TT;
Chris@82 262 T1J = FNMS(KP707106781, T1I, T1H);
Chris@82 263 T1P = FMA(KP707106781, T1I, T1H);
Chris@82 264 {
Chris@82 265 E T1C, T1F, T1M, T1N;
Chris@82 266 T1C = FMA(KP414213562, T1B, T1A);
Chris@82 267 T1F = FNMS(KP414213562, T1E, T1D);
Chris@82 268 T1G = T1C + T1F;
Chris@82 269 T1K = T1F - T1C;
Chris@82 270 T1M = FNMS(KP414213562, T1A, T1B);
Chris@82 271 T1N = FMA(KP414213562, T1D, T1E);
Chris@82 272 T1O = T1M - T1N;
Chris@82 273 T1Q = T1M + T1N;
Chris@82 274 }
Chris@82 275 ro[WS(os, 9)] = FNMS(KP923879532, T1G, T1z);
Chris@82 276 io[WS(os, 9)] = FNMS(KP923879532, T1Q, T1P);
Chris@82 277 ro[WS(os, 1)] = FMA(KP923879532, T1G, T1z);
Chris@82 278 io[WS(os, 1)] = FMA(KP923879532, T1Q, T1P);
Chris@82 279 io[WS(os, 13)] = FNMS(KP923879532, T1K, T1J);
Chris@82 280 ro[WS(os, 13)] = FNMS(KP923879532, T1O, T1L);
Chris@82 281 io[WS(os, 5)] = FMA(KP923879532, T1K, T1J);
Chris@82 282 ro[WS(os, 5)] = FMA(KP923879532, T1O, T1L);
Chris@82 283 }
Chris@82 284 }
Chris@82 285 }
Chris@82 286 }
Chris@82 287
Chris@82 288 static const kdft_desc desc = { 16, "n1_16", {104, 0, 40, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 289
Chris@82 290 void X(codelet_n1_16) (planner *p) {
Chris@82 291 X(kdft_register) (p, n1_16, &desc);
Chris@82 292 }
Chris@82 293
Chris@82 294 #else
Chris@82 295
Chris@82 296 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include dft/scalar/n.h */
Chris@82 297
Chris@82 298 /*
Chris@82 299 * This function contains 144 FP additions, 24 FP multiplications,
Chris@82 300 * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
Chris@82 301 * 50 stack variables, 3 constants, and 64 memory accesses
Chris@82 302 */
Chris@82 303 #include "dft/scalar/n.h"
Chris@82 304
Chris@82 305 static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 306 {
Chris@82 307 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@82 308 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 309 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 310 {
Chris@82 311 INT i;
Chris@82 312 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) {
Chris@82 313 E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
Chris@82 314 E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
Chris@82 315 E T1U, T1A;
Chris@82 316 {
Chris@82 317 E T3, TL, Ty, T1k, T6, T1j, TB, TM;
Chris@82 318 {
Chris@82 319 E T1, T2, Tw, Tx;
Chris@82 320 T1 = ri[0];
Chris@82 321 T2 = ri[WS(is, 8)];
Chris@82 322 T3 = T1 + T2;
Chris@82 323 TL = T1 - T2;
Chris@82 324 Tw = ii[0];
Chris@82 325 Tx = ii[WS(is, 8)];
Chris@82 326 Ty = Tw + Tx;
Chris@82 327 T1k = Tw - Tx;
Chris@82 328 }
Chris@82 329 {
Chris@82 330 E T4, T5, Tz, TA;
Chris@82 331 T4 = ri[WS(is, 4)];
Chris@82 332 T5 = ri[WS(is, 12)];
Chris@82 333 T6 = T4 + T5;
Chris@82 334 T1j = T4 - T5;
Chris@82 335 Tz = ii[WS(is, 4)];
Chris@82 336 TA = ii[WS(is, 12)];
Chris@82 337 TB = Tz + TA;
Chris@82 338 TM = Tz - TA;
Chris@82 339 }
Chris@82 340 T7 = T3 + T6;
Chris@82 341 T1R = T3 - T6;
Chris@82 342 T25 = Ty - TB;
Chris@82 343 TC = Ty + TB;
Chris@82 344 TN = TL - TM;
Chris@82 345 T1x = TL + TM;
Chris@82 346 T1H = T1k - T1j;
Chris@82 347 T1l = T1j + T1k;
Chris@82 348 }
Chris@82 349 {
Chris@82 350 E Tp, T17, T1f, T20, Ts, T1c, T1a, T21;
Chris@82 351 {
Chris@82 352 E Tn, To, T1d, T1e;
Chris@82 353 Tn = ri[WS(is, 15)];
Chris@82 354 To = ri[WS(is, 7)];
Chris@82 355 Tp = Tn + To;
Chris@82 356 T17 = Tn - To;
Chris@82 357 T1d = ii[WS(is, 15)];
Chris@82 358 T1e = ii[WS(is, 7)];
Chris@82 359 T1f = T1d - T1e;
Chris@82 360 T20 = T1d + T1e;
Chris@82 361 }
Chris@82 362 {
Chris@82 363 E Tq, Tr, T18, T19;
Chris@82 364 Tq = ri[WS(is, 3)];
Chris@82 365 Tr = ri[WS(is, 11)];
Chris@82 366 Ts = Tq + Tr;
Chris@82 367 T1c = Tq - Tr;
Chris@82 368 T18 = ii[WS(is, 3)];
Chris@82 369 T19 = ii[WS(is, 11)];
Chris@82 370 T1a = T18 - T19;
Chris@82 371 T21 = T18 + T19;
Chris@82 372 }
Chris@82 373 Tt = Tp + Ts;
Chris@82 374 T22 = T20 - T21;
Chris@82 375 T2h = T20 + T21;
Chris@82 376 T1b = T17 - T1a;
Chris@82 377 T1g = T1c + T1f;
Chris@82 378 T1E = T1f - T1c;
Chris@82 379 T1Z = Tp - Ts;
Chris@82 380 T1D = T17 + T1a;
Chris@82 381 }
Chris@82 382 {
Chris@82 383 E Ta, TP, TF, TO, Td, TR, TI, TS;
Chris@82 384 {
Chris@82 385 E T8, T9, TD, TE;
Chris@82 386 T8 = ri[WS(is, 2)];
Chris@82 387 T9 = ri[WS(is, 10)];
Chris@82 388 Ta = T8 + T9;
Chris@82 389 TP = T8 - T9;
Chris@82 390 TD = ii[WS(is, 2)];
Chris@82 391 TE = ii[WS(is, 10)];
Chris@82 392 TF = TD + TE;
Chris@82 393 TO = TD - TE;
Chris@82 394 }
Chris@82 395 {
Chris@82 396 E Tb, Tc, TG, TH;
Chris@82 397 Tb = ri[WS(is, 14)];
Chris@82 398 Tc = ri[WS(is, 6)];
Chris@82 399 Td = Tb + Tc;
Chris@82 400 TR = Tb - Tc;
Chris@82 401 TG = ii[WS(is, 14)];
Chris@82 402 TH = ii[WS(is, 6)];
Chris@82 403 TI = TG + TH;
Chris@82 404 TS = TG - TH;
Chris@82 405 }
Chris@82 406 Te = Ta + Td;
Chris@82 407 T1S = TF - TI;
Chris@82 408 T26 = Td - Ta;
Chris@82 409 TJ = TF + TI;
Chris@82 410 TQ = TO - TP;
Chris@82 411 T1m = TR - TS;
Chris@82 412 T1n = TP + TO;
Chris@82 413 TT = TR + TS;
Chris@82 414 }
Chris@82 415 {
Chris@82 416 E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
Chris@82 417 {
Chris@82 418 E Tg, Th, TX, TY;
Chris@82 419 Tg = ri[WS(is, 1)];
Chris@82 420 Th = ri[WS(is, 9)];
Chris@82 421 Ti = Tg + Th;
Chris@82 422 T11 = Tg - Th;
Chris@82 423 TX = ii[WS(is, 1)];
Chris@82 424 TY = ii[WS(is, 9)];
Chris@82 425 TZ = TX - TY;
Chris@82 426 T1V = TX + TY;
Chris@82 427 }
Chris@82 428 {
Chris@82 429 E Tj, Tk, T12, T13;
Chris@82 430 Tj = ri[WS(is, 5)];
Chris@82 431 Tk = ri[WS(is, 13)];
Chris@82 432 Tl = Tj + Tk;
Chris@82 433 TW = Tj - Tk;
Chris@82 434 T12 = ii[WS(is, 5)];
Chris@82 435 T13 = ii[WS(is, 13)];
Chris@82 436 T14 = T12 - T13;
Chris@82 437 T1W = T12 + T13;
Chris@82 438 }
Chris@82 439 Tm = Ti + Tl;
Chris@82 440 T1X = T1V - T1W;
Chris@82 441 T2g = T1V + T1W;
Chris@82 442 T10 = TW + TZ;
Chris@82 443 T15 = T11 - T14;
Chris@82 444 T1B = T11 + T14;
Chris@82 445 T1U = Ti - Tl;
Chris@82 446 T1A = TZ - TW;
Chris@82 447 }
Chris@82 448 {
Chris@82 449 E Tf, Tu, T2j, T2k;
Chris@82 450 Tf = T7 + Te;
Chris@82 451 Tu = Tm + Tt;
Chris@82 452 ro[WS(os, 8)] = Tf - Tu;
Chris@82 453 ro[0] = Tf + Tu;
Chris@82 454 T2j = TC + TJ;
Chris@82 455 T2k = T2g + T2h;
Chris@82 456 io[WS(os, 8)] = T2j - T2k;
Chris@82 457 io[0] = T2j + T2k;
Chris@82 458 }
Chris@82 459 {
Chris@82 460 E Tv, TK, T2f, T2i;
Chris@82 461 Tv = Tt - Tm;
Chris@82 462 TK = TC - TJ;
Chris@82 463 io[WS(os, 4)] = Tv + TK;
Chris@82 464 io[WS(os, 12)] = TK - Tv;
Chris@82 465 T2f = T7 - Te;
Chris@82 466 T2i = T2g - T2h;
Chris@82 467 ro[WS(os, 12)] = T2f - T2i;
Chris@82 468 ro[WS(os, 4)] = T2f + T2i;
Chris@82 469 }
Chris@82 470 {
Chris@82 471 E T1T, T27, T24, T28, T1Y, T23;
Chris@82 472 T1T = T1R + T1S;
Chris@82 473 T27 = T25 - T26;
Chris@82 474 T1Y = T1U + T1X;
Chris@82 475 T23 = T1Z - T22;
Chris@82 476 T24 = KP707106781 * (T1Y + T23);
Chris@82 477 T28 = KP707106781 * (T23 - T1Y);
Chris@82 478 ro[WS(os, 10)] = T1T - T24;
Chris@82 479 io[WS(os, 6)] = T27 + T28;
Chris@82 480 ro[WS(os, 2)] = T1T + T24;
Chris@82 481 io[WS(os, 14)] = T27 - T28;
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T29, T2d, T2c, T2e, T2a, T2b;
Chris@82 485 T29 = T1R - T1S;
Chris@82 486 T2d = T26 + T25;
Chris@82 487 T2a = T1X - T1U;
Chris@82 488 T2b = T1Z + T22;
Chris@82 489 T2c = KP707106781 * (T2a - T2b);
Chris@82 490 T2e = KP707106781 * (T2a + T2b);
Chris@82 491 ro[WS(os, 14)] = T29 - T2c;
Chris@82 492 io[WS(os, 2)] = T2d + T2e;
Chris@82 493 ro[WS(os, 6)] = T29 + T2c;
Chris@82 494 io[WS(os, 10)] = T2d - T2e;
Chris@82 495 }
Chris@82 496 {
Chris@82 497 E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o;
Chris@82 498 TU = KP707106781 * (TQ - TT);
Chris@82 499 TV = TN + TU;
Chris@82 500 T1r = TN - TU;
Chris@82 501 T1o = KP707106781 * (T1m - T1n);
Chris@82 502 T1p = T1l - T1o;
Chris@82 503 T1v = T1l + T1o;
Chris@82 504 {
Chris@82 505 E T16, T1h, T1s, T1t;
Chris@82 506 T16 = FMA(KP923879532, T10, KP382683432 * T15);
Chris@82 507 T1h = FNMS(KP923879532, T1g, KP382683432 * T1b);
Chris@82 508 T1i = T16 + T1h;
Chris@82 509 T1q = T1h - T16;
Chris@82 510 T1s = FNMS(KP923879532, T15, KP382683432 * T10);
Chris@82 511 T1t = FMA(KP382683432, T1g, KP923879532 * T1b);
Chris@82 512 T1u = T1s - T1t;
Chris@82 513 T1w = T1s + T1t;
Chris@82 514 }
Chris@82 515 ro[WS(os, 11)] = TV - T1i;
Chris@82 516 io[WS(os, 11)] = T1v - T1w;
Chris@82 517 ro[WS(os, 3)] = TV + T1i;
Chris@82 518 io[WS(os, 3)] = T1v + T1w;
Chris@82 519 io[WS(os, 15)] = T1p - T1q;
Chris@82 520 ro[WS(os, 15)] = T1r - T1u;
Chris@82 521 io[WS(os, 7)] = T1p + T1q;
Chris@82 522 ro[WS(os, 7)] = T1r + T1u;
Chris@82 523 }
Chris@82 524 {
Chris@82 525 E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
Chris@82 526 T1y = KP707106781 * (T1n + T1m);
Chris@82 527 T1z = T1x + T1y;
Chris@82 528 T1L = T1x - T1y;
Chris@82 529 T1I = KP707106781 * (TQ + TT);
Chris@82 530 T1J = T1H - T1I;
Chris@82 531 T1P = T1H + T1I;
Chris@82 532 {
Chris@82 533 E T1C, T1F, T1M, T1N;
Chris@82 534 T1C = FMA(KP382683432, T1A, KP923879532 * T1B);
Chris@82 535 T1F = FNMS(KP382683432, T1E, KP923879532 * T1D);
Chris@82 536 T1G = T1C + T1F;
Chris@82 537 T1K = T1F - T1C;
Chris@82 538 T1M = FNMS(KP382683432, T1B, KP923879532 * T1A);
Chris@82 539 T1N = FMA(KP923879532, T1E, KP382683432 * T1D);
Chris@82 540 T1O = T1M - T1N;
Chris@82 541 T1Q = T1M + T1N;
Chris@82 542 }
Chris@82 543 ro[WS(os, 9)] = T1z - T1G;
Chris@82 544 io[WS(os, 9)] = T1P - T1Q;
Chris@82 545 ro[WS(os, 1)] = T1z + T1G;
Chris@82 546 io[WS(os, 1)] = T1P + T1Q;
Chris@82 547 io[WS(os, 13)] = T1J - T1K;
Chris@82 548 ro[WS(os, 13)] = T1L - T1O;
Chris@82 549 io[WS(os, 5)] = T1J + T1K;
Chris@82 550 ro[WS(os, 5)] = T1L + T1O;
Chris@82 551 }
Chris@82 552 }
Chris@82 553 }
Chris@82 554 }
Chris@82 555
Chris@82 556 static const kdft_desc desc = { 16, "n1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 557
Chris@82 558 void X(codelet_n1_16) (planner *p) {
Chris@82 559 X(kdft_register) (p, n1_16, &desc);
Chris@82 560 }
Chris@82 561
Chris@82 562 #endif