annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cfdft_16.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:12 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cfdft_16 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 206 FP additions, 132 FP multiplications,
Chris@82 32 * (or, 136 additions, 62 multiplications, 70 fused multiply/add),
Chris@82 33 * 67 stack variables, 4 constants, and 64 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cfdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@82 40 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@82 41 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 42 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 46 E T1f, T2e, T1c, T2g, T1K, T3D, T2W, T3H, TR, T2j, T2R, T3E, T11, T2l, T1v;
Chris@82 47 E T3G, Ta, T2p, Tk, T2r, T3o, T3p, T1Y, T3z, T2G, T3w, Tv, T2u, TF, T2w;
Chris@82 48 E T3r, T3s, T2b, T3A, T2L, T3x;
Chris@82 49 {
Chris@82 50 E T1d, T1e, T1I, T16, T1A, T1D, T1E, T1C, T1G, T1H, T2U, T1b, T1z, T2S, T1w;
Chris@82 51 E T1y, T14, T15;
Chris@82 52 T1d = Ip[0];
Chris@82 53 T1e = Im[0];
Chris@82 54 T1I = T1d + T1e;
Chris@82 55 T14 = Ip[WS(rs, 4)];
Chris@82 56 T15 = Im[WS(rs, 4)];
Chris@82 57 T16 = T14 - T15;
Chris@82 58 T1A = T14 + T15;
Chris@82 59 {
Chris@82 60 E T1F, T19, T1a, T1x;
Chris@82 61 T1D = Rm[0];
Chris@82 62 T1E = Rp[0];
Chris@82 63 T1F = T1D - T1E;
Chris@82 64 T1C = W[0];
Chris@82 65 T1G = T1C * T1F;
Chris@82 66 T1H = W[1];
Chris@82 67 T2U = T1H * T1F;
Chris@82 68 T19 = Rp[WS(rs, 4)];
Chris@82 69 T1a = Rm[WS(rs, 4)];
Chris@82 70 T1x = T1a - T19;
Chris@82 71 T1b = T19 + T1a;
Chris@82 72 T1z = W[17];
Chris@82 73 T2S = T1z * T1x;
Chris@82 74 T1w = W[16];
Chris@82 75 T1y = T1w * T1x;
Chris@82 76 }
Chris@82 77 T1f = T1d - T1e;
Chris@82 78 T2e = T1E + T1D;
Chris@82 79 {
Chris@82 80 E T17, T2f, T13, T18;
Chris@82 81 T13 = W[14];
Chris@82 82 T17 = T13 * T16;
Chris@82 83 T2f = T13 * T1b;
Chris@82 84 T18 = W[15];
Chris@82 85 T1c = FNMS(T18, T1b, T17);
Chris@82 86 T2g = FMA(T18, T16, T2f);
Chris@82 87 }
Chris@82 88 {
Chris@82 89 E T1B, T1J, T2T, T2V;
Chris@82 90 T1B = FNMS(T1z, T1A, T1y);
Chris@82 91 T1J = FNMS(T1H, T1I, T1G);
Chris@82 92 T1K = T1B + T1J;
Chris@82 93 T3D = T1J - T1B;
Chris@82 94 T2T = FMA(T1w, T1A, T2S);
Chris@82 95 T2V = FMA(T1C, T1I, T2U);
Chris@82 96 T2W = T2T + T2V;
Chris@82 97 T3H = T2V - T2T;
Chris@82 98 }
Chris@82 99 }
Chris@82 100 {
Chris@82 101 E TL, T1n, TQ, T1m, T2N, T1j, T1l, TV, T1t, T10, T1s, T2P, T1p, T1r;
Chris@82 102 {
Chris@82 103 E TJ, TK, TO, TP, T1k;
Chris@82 104 TJ = Ip[WS(rs, 2)];
Chris@82 105 TK = Im[WS(rs, 2)];
Chris@82 106 TL = TJ - TK;
Chris@82 107 T1n = TJ + TK;
Chris@82 108 TO = Rp[WS(rs, 2)];
Chris@82 109 TP = Rm[WS(rs, 2)];
Chris@82 110 T1k = TP - TO;
Chris@82 111 TQ = TO + TP;
Chris@82 112 T1m = W[9];
Chris@82 113 T2N = T1m * T1k;
Chris@82 114 T1j = W[8];
Chris@82 115 T1l = T1j * T1k;
Chris@82 116 }
Chris@82 117 {
Chris@82 118 E TT, TU, TY, TZ, T1q;
Chris@82 119 TT = Ip[WS(rs, 6)];
Chris@82 120 TU = Im[WS(rs, 6)];
Chris@82 121 TV = TT - TU;
Chris@82 122 T1t = TT + TU;
Chris@82 123 TY = Rp[WS(rs, 6)];
Chris@82 124 TZ = Rm[WS(rs, 6)];
Chris@82 125 T1q = TZ - TY;
Chris@82 126 T10 = TY + TZ;
Chris@82 127 T1s = W[25];
Chris@82 128 T2P = T1s * T1q;
Chris@82 129 T1p = W[24];
Chris@82 130 T1r = T1p * T1q;
Chris@82 131 }
Chris@82 132 {
Chris@82 133 E T2O, T2Q, T1o, T1u;
Chris@82 134 {
Chris@82 135 E TM, T2i, TI, TN;
Chris@82 136 TI = W[6];
Chris@82 137 TM = TI * TL;
Chris@82 138 T2i = TI * TQ;
Chris@82 139 TN = W[7];
Chris@82 140 TR = FNMS(TN, TQ, TM);
Chris@82 141 T2j = FMA(TN, TL, T2i);
Chris@82 142 }
Chris@82 143 T2O = FMA(T1j, T1n, T2N);
Chris@82 144 T2Q = FMA(T1p, T1t, T2P);
Chris@82 145 T2R = T2O + T2Q;
Chris@82 146 T3E = T2O - T2Q;
Chris@82 147 {
Chris@82 148 E TW, T2k, TS, TX;
Chris@82 149 TS = W[22];
Chris@82 150 TW = TS * TV;
Chris@82 151 T2k = TS * T10;
Chris@82 152 TX = W[23];
Chris@82 153 T11 = FNMS(TX, T10, TW);
Chris@82 154 T2l = FMA(TX, TV, T2k);
Chris@82 155 }
Chris@82 156 T1o = FNMS(T1m, T1n, T1l);
Chris@82 157 T1u = FNMS(T1s, T1t, T1r);
Chris@82 158 T1v = T1o + T1u;
Chris@82 159 T3G = T1o - T1u;
Chris@82 160 }
Chris@82 161 }
Chris@82 162 {
Chris@82 163 E T4, T1Q, T9, T1N, T5, T2o, T1O, T2C, Te, T1W, Tj, T1T, Tf, T2q, T1U;
Chris@82 164 E T2E, T6, Tg;
Chris@82 165 {
Chris@82 166 E T1, T1M, Tb, T1S;
Chris@82 167 {
Chris@82 168 E T2, T3, T7, T8;
Chris@82 169 T2 = Ip[WS(rs, 1)];
Chris@82 170 T3 = Im[WS(rs, 1)];
Chris@82 171 T4 = T2 - T3;
Chris@82 172 T1Q = T2 + T3;
Chris@82 173 T7 = Rp[WS(rs, 1)];
Chris@82 174 T8 = Rm[WS(rs, 1)];
Chris@82 175 T9 = T7 + T8;
Chris@82 176 T1N = T7 - T8;
Chris@82 177 }
Chris@82 178 T1 = W[2];
Chris@82 179 T5 = T1 * T4;
Chris@82 180 T2o = T1 * T9;
Chris@82 181 T1M = W[4];
Chris@82 182 T1O = T1M * T1N;
Chris@82 183 T2C = T1M * T1Q;
Chris@82 184 {
Chris@82 185 E Tc, Td, Th, Ti;
Chris@82 186 Tc = Ip[WS(rs, 5)];
Chris@82 187 Td = Im[WS(rs, 5)];
Chris@82 188 Te = Tc - Td;
Chris@82 189 T1W = Tc + Td;
Chris@82 190 Th = Rp[WS(rs, 5)];
Chris@82 191 Ti = Rm[WS(rs, 5)];
Chris@82 192 Tj = Th + Ti;
Chris@82 193 T1T = Th - Ti;
Chris@82 194 }
Chris@82 195 Tb = W[18];
Chris@82 196 Tf = Tb * Te;
Chris@82 197 T2q = Tb * Tj;
Chris@82 198 T1S = W[20];
Chris@82 199 T1U = T1S * T1T;
Chris@82 200 T2E = T1S * T1W;
Chris@82 201 }
Chris@82 202 T6 = W[3];
Chris@82 203 Ta = FNMS(T6, T9, T5);
Chris@82 204 T2p = FMA(T6, T4, T2o);
Chris@82 205 Tg = W[19];
Chris@82 206 Tk = FNMS(Tg, Tj, Tf);
Chris@82 207 T2r = FMA(Tg, Te, T2q);
Chris@82 208 T3o = Ta - Tk;
Chris@82 209 T3p = T2p - T2r;
Chris@82 210 {
Chris@82 211 E T1R, T2D, T1X, T2F, T1P, T1V;
Chris@82 212 T1P = W[5];
Chris@82 213 T1R = FMA(T1P, T1Q, T1O);
Chris@82 214 T2D = FNMS(T1P, T1N, T2C);
Chris@82 215 T1V = W[21];
Chris@82 216 T1X = FMA(T1V, T1W, T1U);
Chris@82 217 T2F = FNMS(T1V, T1T, T2E);
Chris@82 218 T1Y = T1R + T1X;
Chris@82 219 T3z = T1X - T1R;
Chris@82 220 T2G = T2D + T2F;
Chris@82 221 T3w = T2F - T2D;
Chris@82 222 }
Chris@82 223 }
Chris@82 224 {
Chris@82 225 E Tp, T23, Tu, T20, Tq, T2t, T21, T2H, Tz, T29, TE, T26, TA, T2v, T27;
Chris@82 226 E T2J, Tr, TB;
Chris@82 227 {
Chris@82 228 E Tm, T1Z, Tw, T25;
Chris@82 229 {
Chris@82 230 E Tn, To, Ts, Tt;
Chris@82 231 Tn = Ip[WS(rs, 7)];
Chris@82 232 To = Im[WS(rs, 7)];
Chris@82 233 Tp = Tn - To;
Chris@82 234 T23 = Tn + To;
Chris@82 235 Ts = Rp[WS(rs, 7)];
Chris@82 236 Tt = Rm[WS(rs, 7)];
Chris@82 237 Tu = Ts + Tt;
Chris@82 238 T20 = Ts - Tt;
Chris@82 239 }
Chris@82 240 Tm = W[26];
Chris@82 241 Tq = Tm * Tp;
Chris@82 242 T2t = Tm * Tu;
Chris@82 243 T1Z = W[28];
Chris@82 244 T21 = T1Z * T20;
Chris@82 245 T2H = T1Z * T23;
Chris@82 246 {
Chris@82 247 E Tx, Ty, TC, TD;
Chris@82 248 Tx = Ip[WS(rs, 3)];
Chris@82 249 Ty = Im[WS(rs, 3)];
Chris@82 250 Tz = Tx - Ty;
Chris@82 251 T29 = Tx + Ty;
Chris@82 252 TC = Rp[WS(rs, 3)];
Chris@82 253 TD = Rm[WS(rs, 3)];
Chris@82 254 TE = TC + TD;
Chris@82 255 T26 = TC - TD;
Chris@82 256 }
Chris@82 257 Tw = W[10];
Chris@82 258 TA = Tw * Tz;
Chris@82 259 T2v = Tw * TE;
Chris@82 260 T25 = W[12];
Chris@82 261 T27 = T25 * T26;
Chris@82 262 T2J = T25 * T29;
Chris@82 263 }
Chris@82 264 Tr = W[27];
Chris@82 265 Tv = FNMS(Tr, Tu, Tq);
Chris@82 266 T2u = FMA(Tr, Tp, T2t);
Chris@82 267 TB = W[11];
Chris@82 268 TF = FNMS(TB, TE, TA);
Chris@82 269 T2w = FMA(TB, Tz, T2v);
Chris@82 270 T3r = T2u - T2w;
Chris@82 271 T3s = Tv - TF;
Chris@82 272 {
Chris@82 273 E T24, T2I, T2a, T2K, T22, T28;
Chris@82 274 T22 = W[29];
Chris@82 275 T24 = FMA(T22, T23, T21);
Chris@82 276 T2I = FNMS(T22, T20, T2H);
Chris@82 277 T28 = W[13];
Chris@82 278 T2a = FMA(T28, T29, T27);
Chris@82 279 T2K = FNMS(T28, T26, T2J);
Chris@82 280 T2b = T24 + T2a;
Chris@82 281 T3A = T2I - T2K;
Chris@82 282 T2L = T2I + T2K;
Chris@82 283 T3x = T2a - T24;
Chris@82 284 }
Chris@82 285 }
Chris@82 286 {
Chris@82 287 E TH, T3c, T36, T3g, T39, T3h, T1h, T32, T2d, T2A, T2y, T31, T2Y, T30, T2n;
Chris@82 288 E T3b;
Chris@82 289 {
Chris@82 290 E Tl, TG, T34, T35;
Chris@82 291 Tl = Ta + Tk;
Chris@82 292 TG = Tv + TF;
Chris@82 293 TH = Tl + TG;
Chris@82 294 T3c = Tl - TG;
Chris@82 295 T34 = T2L - T2G;
Chris@82 296 T35 = T1Y - T2b;
Chris@82 297 T36 = T34 + T35;
Chris@82 298 T3g = T34 - T35;
Chris@82 299 }
Chris@82 300 {
Chris@82 301 E T37, T38, T12, T1g;
Chris@82 302 T37 = T1K - T1v;
Chris@82 303 T38 = T2W - T2R;
Chris@82 304 T39 = T37 - T38;
Chris@82 305 T3h = T37 + T38;
Chris@82 306 T12 = TR + T11;
Chris@82 307 T1g = T1c + T1f;
Chris@82 308 T1h = T12 + T1g;
Chris@82 309 T32 = T1g - T12;
Chris@82 310 }
Chris@82 311 {
Chris@82 312 E T1L, T2c, T2s, T2x;
Chris@82 313 T1L = T1v + T1K;
Chris@82 314 T2c = T1Y + T2b;
Chris@82 315 T2d = T1L - T2c;
Chris@82 316 T2A = T2c + T1L;
Chris@82 317 T2s = T2p + T2r;
Chris@82 318 T2x = T2u + T2w;
Chris@82 319 T2y = T2s + T2x;
Chris@82 320 T31 = T2x - T2s;
Chris@82 321 }
Chris@82 322 {
Chris@82 323 E T2M, T2X, T2h, T2m;
Chris@82 324 T2M = T2G + T2L;
Chris@82 325 T2X = T2R + T2W;
Chris@82 326 T2Y = T2M - T2X;
Chris@82 327 T30 = T2M + T2X;
Chris@82 328 T2h = T2e + T2g;
Chris@82 329 T2m = T2j + T2l;
Chris@82 330 T2n = T2h + T2m;
Chris@82 331 T3b = T2h - T2m;
Chris@82 332 }
Chris@82 333 {
Chris@82 334 E T1i, T2Z, T2z, T2B;
Chris@82 335 T1i = TH + T1h;
Chris@82 336 Ip[0] = KP500000000 * (T1i + T2d);
Chris@82 337 Im[WS(rs, 7)] = KP500000000 * (T2d - T1i);
Chris@82 338 T2Z = T2n + T2y;
Chris@82 339 Rm[WS(rs, 7)] = KP500000000 * (T2Z - T30);
Chris@82 340 Rp[0] = KP500000000 * (T2Z + T30);
Chris@82 341 T2z = T2n - T2y;
Chris@82 342 Rm[WS(rs, 3)] = KP500000000 * (T2z - T2A);
Chris@82 343 Rp[WS(rs, 4)] = KP500000000 * (T2z + T2A);
Chris@82 344 T2B = T1h - TH;
Chris@82 345 Ip[WS(rs, 4)] = KP500000000 * (T2B + T2Y);
Chris@82 346 Im[WS(rs, 3)] = KP500000000 * (T2Y - T2B);
Chris@82 347 }
Chris@82 348 {
Chris@82 349 E T33, T3a, T3j, T3k;
Chris@82 350 T33 = T31 + T32;
Chris@82 351 T3a = T36 + T39;
Chris@82 352 Ip[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3a, T33));
Chris@82 353 Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP707106781, T3a, T33)));
Chris@82 354 T3j = T3b + T3c;
Chris@82 355 T3k = T3g + T3h;
Chris@82 356 Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP707106781, T3k, T3j));
Chris@82 357 Rp[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3k, T3j));
Chris@82 358 }
Chris@82 359 {
Chris@82 360 E T3d, T3e, T3f, T3i;
Chris@82 361 T3d = T3b - T3c;
Chris@82 362 T3e = T39 - T36;
Chris@82 363 Rm[WS(rs, 1)] = KP500000000 * (FNMS(KP707106781, T3e, T3d));
Chris@82 364 Rp[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3e, T3d));
Chris@82 365 T3f = T32 - T31;
Chris@82 366 T3i = T3g - T3h;
Chris@82 367 Ip[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3i, T3f));
Chris@82 368 Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP707106781, T3i, T3f)));
Chris@82 369 }
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E T3n, T3Z, T44, T4e, T47, T4f, T3u, T4a, T3C, T3U, T3N, T49, T3Q, T40, T3J;
Chris@82 373 E T3V;
Chris@82 374 {
Chris@82 375 E T3l, T3m, T42, T43;
Chris@82 376 T3l = T1f - T1c;
Chris@82 377 T3m = T2j - T2l;
Chris@82 378 T3n = T3l - T3m;
Chris@82 379 T3Z = T3m + T3l;
Chris@82 380 T42 = T3w - T3x;
Chris@82 381 T43 = T3A - T3z;
Chris@82 382 T44 = FMA(KP414213562, T43, T42);
Chris@82 383 T4e = FNMS(KP414213562, T42, T43);
Chris@82 384 }
Chris@82 385 {
Chris@82 386 E T45, T46, T3q, T3t;
Chris@82 387 T45 = T3E + T3D;
Chris@82 388 T46 = T3H - T3G;
Chris@82 389 T47 = FMA(KP414213562, T46, T45);
Chris@82 390 T4f = FNMS(KP414213562, T45, T46);
Chris@82 391 T3q = T3o - T3p;
Chris@82 392 T3t = T3r + T3s;
Chris@82 393 T3u = T3q + T3t;
Chris@82 394 T4a = T3q - T3t;
Chris@82 395 }
Chris@82 396 {
Chris@82 397 E T3y, T3B, T3L, T3M;
Chris@82 398 T3y = T3w + T3x;
Chris@82 399 T3B = T3z + T3A;
Chris@82 400 T3C = FMA(KP414213562, T3B, T3y);
Chris@82 401 T3U = FNMS(KP414213562, T3y, T3B);
Chris@82 402 T3L = T2e - T2g;
Chris@82 403 T3M = TR - T11;
Chris@82 404 T3N = T3L + T3M;
Chris@82 405 T49 = T3L - T3M;
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E T3O, T3P, T3F, T3I;
Chris@82 409 T3O = T3p + T3o;
Chris@82 410 T3P = T3r - T3s;
Chris@82 411 T3Q = T3O + T3P;
Chris@82 412 T40 = T3P - T3O;
Chris@82 413 T3F = T3D - T3E;
Chris@82 414 T3I = T3G + T3H;
Chris@82 415 T3J = FNMS(KP414213562, T3I, T3F);
Chris@82 416 T3V = FMA(KP414213562, T3F, T3I);
Chris@82 417 }
Chris@82 418 {
Chris@82 419 E T3v, T3K, T3X, T3Y;
Chris@82 420 T3v = FMA(KP707106781, T3u, T3n);
Chris@82 421 T3K = T3C + T3J;
Chris@82 422 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3K, T3v));
Chris@82 423 Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP923879532, T3K, T3v)));
Chris@82 424 T3X = FMA(KP707106781, T3Q, T3N);
Chris@82 425 T3Y = T3U + T3V;
Chris@82 426 Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP923879532, T3Y, T3X));
Chris@82 427 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3Y, T3X));
Chris@82 428 }
Chris@82 429 {
Chris@82 430 E T3R, T3S, T3T, T3W;
Chris@82 431 T3R = FNMS(KP707106781, T3Q, T3N);
Chris@82 432 T3S = T3J - T3C;
Chris@82 433 Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP923879532, T3S, T3R));
Chris@82 434 Rp[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T3S, T3R));
Chris@82 435 T3T = FNMS(KP707106781, T3u, T3n);
Chris@82 436 T3W = T3U - T3V;
Chris@82 437 Ip[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T3W, T3T));
Chris@82 438 Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP923879532, T3W, T3T)));
Chris@82 439 }
Chris@82 440 {
Chris@82 441 E T41, T48, T4h, T4i;
Chris@82 442 T41 = FNMS(KP707106781, T40, T3Z);
Chris@82 443 T48 = T44 - T47;
Chris@82 444 Ip[WS(rs, 7)] = KP500000000 * (FMA(KP923879532, T48, T41));
Chris@82 445 Im[0] = -(KP500000000 * (FNMS(KP923879532, T48, T41)));
Chris@82 446 T4h = FNMS(KP707106781, T4a, T49);
Chris@82 447 T4i = T4e + T4f;
Chris@82 448 Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP923879532, T4i, T4h));
Chris@82 449 Rm[0] = KP500000000 * (FMA(KP923879532, T4i, T4h));
Chris@82 450 }
Chris@82 451 {
Chris@82 452 E T4b, T4c, T4d, T4g;
Chris@82 453 T4b = FMA(KP707106781, T4a, T49);
Chris@82 454 T4c = T44 + T47;
Chris@82 455 Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP923879532, T4c, T4b));
Chris@82 456 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4c, T4b));
Chris@82 457 T4d = FMA(KP707106781, T40, T3Z);
Chris@82 458 T4g = T4e - T4f;
Chris@82 459 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4g, T4d));
Chris@82 460 Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP923879532, T4g, T4d)));
Chris@82 461 }
Chris@82 462 }
Chris@82 463 }
Chris@82 464 }
Chris@82 465 }
Chris@82 466
Chris@82 467 static const tw_instr twinstr[] = {
Chris@82 468 {TW_FULL, 1, 16},
Chris@82 469 {TW_NEXT, 1, 0}
Chris@82 470 };
Chris@82 471
Chris@82 472 static const hc2c_desc desc = { 16, "hc2cfdft_16", twinstr, &GENUS, {136, 62, 70, 0} };
Chris@82 473
Chris@82 474 void X(codelet_hc2cfdft_16) (planner *p) {
Chris@82 475 X(khc2c_register) (p, hc2cfdft_16, &desc, HC2C_VIA_DFT);
Chris@82 476 }
Chris@82 477 #else
Chris@82 478
Chris@82 479 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 16 -dit -name hc2cfdft_16 -include rdft/scalar/hc2cf.h */
Chris@82 480
Chris@82 481 /*
Chris@82 482 * This function contains 206 FP additions, 100 FP multiplications,
Chris@82 483 * (or, 168 additions, 62 multiplications, 38 fused multiply/add),
Chris@82 484 * 61 stack variables, 4 constants, and 64 memory accesses
Chris@82 485 */
Chris@82 486 #include "rdft/scalar/hc2cf.h"
Chris@82 487
Chris@82 488 static void hc2cfdft_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 489 {
Chris@82 490 DK(KP461939766, +0.461939766255643378064091594698394143411208313);
Chris@82 491 DK(KP191341716, +0.191341716182544885864229992015199433380672281);
Chris@82 492 DK(KP353553390, +0.353553390593273762200422181052424519642417969);
Chris@82 493 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 494 {
Chris@82 495 INT m;
Chris@82 496 for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(64, rs)) {
Chris@82 497 E T19, T3h, T21, T2Y, T1o, T3d, T2s, T39, TW, T3i, T24, T2Z, T1z, T3c, T2p;
Chris@82 498 E T3a, Tj, T2S, T28, T2R, T1L, T36, T2i, T32, TC, T2V, T2b, T2U, T1W, T35;
Chris@82 499 E T2l, T33;
Chris@82 500 {
Chris@82 501 E T10, T1m, T14, T1k, T18, T1h, T1f, T1Z;
Chris@82 502 {
Chris@82 503 E TY, TZ, T12, T13;
Chris@82 504 TY = Ip[WS(rs, 4)];
Chris@82 505 TZ = Im[WS(rs, 4)];
Chris@82 506 T10 = TY - TZ;
Chris@82 507 T1m = TY + TZ;
Chris@82 508 T12 = Rp[WS(rs, 4)];
Chris@82 509 T13 = Rm[WS(rs, 4)];
Chris@82 510 T14 = T12 + T13;
Chris@82 511 T1k = T12 - T13;
Chris@82 512 }
Chris@82 513 {
Chris@82 514 E T16, T17, T1d, T1e;
Chris@82 515 T16 = Ip[0];
Chris@82 516 T17 = Im[0];
Chris@82 517 T18 = T16 - T17;
Chris@82 518 T1h = T16 + T17;
Chris@82 519 T1d = Rm[0];
Chris@82 520 T1e = Rp[0];
Chris@82 521 T1f = T1d - T1e;
Chris@82 522 T1Z = T1e + T1d;
Chris@82 523 }
Chris@82 524 {
Chris@82 525 E T15, T20, TX, T11;
Chris@82 526 TX = W[14];
Chris@82 527 T11 = W[15];
Chris@82 528 T15 = FNMS(T11, T14, TX * T10);
Chris@82 529 T20 = FMA(TX, T14, T11 * T10);
Chris@82 530 T19 = T15 + T18;
Chris@82 531 T3h = T1Z - T20;
Chris@82 532 T21 = T1Z + T20;
Chris@82 533 T2Y = T18 - T15;
Chris@82 534 }
Chris@82 535 {
Chris@82 536 E T1i, T2r, T1n, T2q;
Chris@82 537 {
Chris@82 538 E T1c, T1g, T1j, T1l;
Chris@82 539 T1c = W[0];
Chris@82 540 T1g = W[1];
Chris@82 541 T1i = FNMS(T1g, T1h, T1c * T1f);
Chris@82 542 T2r = FMA(T1g, T1f, T1c * T1h);
Chris@82 543 T1j = W[16];
Chris@82 544 T1l = W[17];
Chris@82 545 T1n = FMA(T1j, T1k, T1l * T1m);
Chris@82 546 T2q = FNMS(T1l, T1k, T1j * T1m);
Chris@82 547 }
Chris@82 548 T1o = T1i - T1n;
Chris@82 549 T3d = T2r - T2q;
Chris@82 550 T2s = T2q + T2r;
Chris@82 551 T39 = T1n + T1i;
Chris@82 552 }
Chris@82 553 }
Chris@82 554 {
Chris@82 555 E TH, T1s, TL, T1q, TQ, T1x, TU, T1v;
Chris@82 556 {
Chris@82 557 E TF, TG, TJ, TK;
Chris@82 558 TF = Ip[WS(rs, 2)];
Chris@82 559 TG = Im[WS(rs, 2)];
Chris@82 560 TH = TF - TG;
Chris@82 561 T1s = TF + TG;
Chris@82 562 TJ = Rp[WS(rs, 2)];
Chris@82 563 TK = Rm[WS(rs, 2)];
Chris@82 564 TL = TJ + TK;
Chris@82 565 T1q = TJ - TK;
Chris@82 566 }
Chris@82 567 {
Chris@82 568 E TO, TP, TS, TT;
Chris@82 569 TO = Ip[WS(rs, 6)];
Chris@82 570 TP = Im[WS(rs, 6)];
Chris@82 571 TQ = TO - TP;
Chris@82 572 T1x = TO + TP;
Chris@82 573 TS = Rp[WS(rs, 6)];
Chris@82 574 TT = Rm[WS(rs, 6)];
Chris@82 575 TU = TS + TT;
Chris@82 576 T1v = TS - TT;
Chris@82 577 }
Chris@82 578 {
Chris@82 579 E TM, T22, TV, T23;
Chris@82 580 {
Chris@82 581 E TE, TI, TN, TR;
Chris@82 582 TE = W[6];
Chris@82 583 TI = W[7];
Chris@82 584 TM = FNMS(TI, TL, TE * TH);
Chris@82 585 T22 = FMA(TE, TL, TI * TH);
Chris@82 586 TN = W[22];
Chris@82 587 TR = W[23];
Chris@82 588 TV = FNMS(TR, TU, TN * TQ);
Chris@82 589 T23 = FMA(TN, TU, TR * TQ);
Chris@82 590 }
Chris@82 591 TW = TM + TV;
Chris@82 592 T3i = TM - TV;
Chris@82 593 T24 = T22 + T23;
Chris@82 594 T2Z = T22 - T23;
Chris@82 595 }
Chris@82 596 {
Chris@82 597 E T1t, T2n, T1y, T2o;
Chris@82 598 {
Chris@82 599 E T1p, T1r, T1u, T1w;
Chris@82 600 T1p = W[8];
Chris@82 601 T1r = W[9];
Chris@82 602 T1t = FMA(T1p, T1q, T1r * T1s);
Chris@82 603 T2n = FNMS(T1r, T1q, T1p * T1s);
Chris@82 604 T1u = W[24];
Chris@82 605 T1w = W[25];
Chris@82 606 T1y = FMA(T1u, T1v, T1w * T1x);
Chris@82 607 T2o = FNMS(T1w, T1v, T1u * T1x);
Chris@82 608 }
Chris@82 609 T1z = T1t + T1y;
Chris@82 610 T3c = T1y - T1t;
Chris@82 611 T2p = T2n + T2o;
Chris@82 612 T3a = T2n - T2o;
Chris@82 613 }
Chris@82 614 }
Chris@82 615 {
Chris@82 616 E T4, T1E, T8, T1C, Td, T1J, Th, T1H;
Chris@82 617 {
Chris@82 618 E T2, T3, T6, T7;
Chris@82 619 T2 = Ip[WS(rs, 1)];
Chris@82 620 T3 = Im[WS(rs, 1)];
Chris@82 621 T4 = T2 - T3;
Chris@82 622 T1E = T2 + T3;
Chris@82 623 T6 = Rp[WS(rs, 1)];
Chris@82 624 T7 = Rm[WS(rs, 1)];
Chris@82 625 T8 = T6 + T7;
Chris@82 626 T1C = T6 - T7;
Chris@82 627 }
Chris@82 628 {
Chris@82 629 E Tb, Tc, Tf, Tg;
Chris@82 630 Tb = Ip[WS(rs, 5)];
Chris@82 631 Tc = Im[WS(rs, 5)];
Chris@82 632 Td = Tb - Tc;
Chris@82 633 T1J = Tb + Tc;
Chris@82 634 Tf = Rp[WS(rs, 5)];
Chris@82 635 Tg = Rm[WS(rs, 5)];
Chris@82 636 Th = Tf + Tg;
Chris@82 637 T1H = Tf - Tg;
Chris@82 638 }
Chris@82 639 {
Chris@82 640 E T9, T26, Ti, T27;
Chris@82 641 {
Chris@82 642 E T1, T5, Ta, Te;
Chris@82 643 T1 = W[2];
Chris@82 644 T5 = W[3];
Chris@82 645 T9 = FNMS(T5, T8, T1 * T4);
Chris@82 646 T26 = FMA(T1, T8, T5 * T4);
Chris@82 647 Ta = W[18];
Chris@82 648 Te = W[19];
Chris@82 649 Ti = FNMS(Te, Th, Ta * Td);
Chris@82 650 T27 = FMA(Ta, Th, Te * Td);
Chris@82 651 }
Chris@82 652 Tj = T9 + Ti;
Chris@82 653 T2S = T26 - T27;
Chris@82 654 T28 = T26 + T27;
Chris@82 655 T2R = T9 - Ti;
Chris@82 656 }
Chris@82 657 {
Chris@82 658 E T1F, T2g, T1K, T2h;
Chris@82 659 {
Chris@82 660 E T1B, T1D, T1G, T1I;
Chris@82 661 T1B = W[4];
Chris@82 662 T1D = W[5];
Chris@82 663 T1F = FMA(T1B, T1C, T1D * T1E);
Chris@82 664 T2g = FNMS(T1D, T1C, T1B * T1E);
Chris@82 665 T1G = W[20];
Chris@82 666 T1I = W[21];
Chris@82 667 T1K = FMA(T1G, T1H, T1I * T1J);
Chris@82 668 T2h = FNMS(T1I, T1H, T1G * T1J);
Chris@82 669 }
Chris@82 670 T1L = T1F + T1K;
Chris@82 671 T36 = T2g - T2h;
Chris@82 672 T2i = T2g + T2h;
Chris@82 673 T32 = T1K - T1F;
Chris@82 674 }
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E Tn, T1P, Tr, T1N, Tw, T1U, TA, T1S;
Chris@82 678 {
Chris@82 679 E Tl, Tm, Tp, Tq;
Chris@82 680 Tl = Ip[WS(rs, 7)];
Chris@82 681 Tm = Im[WS(rs, 7)];
Chris@82 682 Tn = Tl - Tm;
Chris@82 683 T1P = Tl + Tm;
Chris@82 684 Tp = Rp[WS(rs, 7)];
Chris@82 685 Tq = Rm[WS(rs, 7)];
Chris@82 686 Tr = Tp + Tq;
Chris@82 687 T1N = Tp - Tq;
Chris@82 688 }
Chris@82 689 {
Chris@82 690 E Tu, Tv, Ty, Tz;
Chris@82 691 Tu = Ip[WS(rs, 3)];
Chris@82 692 Tv = Im[WS(rs, 3)];
Chris@82 693 Tw = Tu - Tv;
Chris@82 694 T1U = Tu + Tv;
Chris@82 695 Ty = Rp[WS(rs, 3)];
Chris@82 696 Tz = Rm[WS(rs, 3)];
Chris@82 697 TA = Ty + Tz;
Chris@82 698 T1S = Ty - Tz;
Chris@82 699 }
Chris@82 700 {
Chris@82 701 E Ts, T29, TB, T2a;
Chris@82 702 {
Chris@82 703 E Tk, To, Tt, Tx;
Chris@82 704 Tk = W[26];
Chris@82 705 To = W[27];
Chris@82 706 Ts = FNMS(To, Tr, Tk * Tn);
Chris@82 707 T29 = FMA(Tk, Tr, To * Tn);
Chris@82 708 Tt = W[10];
Chris@82 709 Tx = W[11];
Chris@82 710 TB = FNMS(Tx, TA, Tt * Tw);
Chris@82 711 T2a = FMA(Tt, TA, Tx * Tw);
Chris@82 712 }
Chris@82 713 TC = Ts + TB;
Chris@82 714 T2V = Ts - TB;
Chris@82 715 T2b = T29 + T2a;
Chris@82 716 T2U = T29 - T2a;
Chris@82 717 }
Chris@82 718 {
Chris@82 719 E T1Q, T2j, T1V, T2k;
Chris@82 720 {
Chris@82 721 E T1M, T1O, T1R, T1T;
Chris@82 722 T1M = W[28];
Chris@82 723 T1O = W[29];
Chris@82 724 T1Q = FMA(T1M, T1N, T1O * T1P);
Chris@82 725 T2j = FNMS(T1O, T1N, T1M * T1P);
Chris@82 726 T1R = W[12];
Chris@82 727 T1T = W[13];
Chris@82 728 T1V = FMA(T1R, T1S, T1T * T1U);
Chris@82 729 T2k = FNMS(T1T, T1S, T1R * T1U);
Chris@82 730 }
Chris@82 731 T1W = T1Q + T1V;
Chris@82 732 T35 = T1V - T1Q;
Chris@82 733 T2l = T2j + T2k;
Chris@82 734 T33 = T2j - T2k;
Chris@82 735 }
Chris@82 736 }
Chris@82 737 {
Chris@82 738 E T1b, T2f, T2u, T2w, T1Y, T2e, T2d, T2v;
Chris@82 739 {
Chris@82 740 E TD, T1a, T2m, T2t;
Chris@82 741 TD = Tj + TC;
Chris@82 742 T1a = TW + T19;
Chris@82 743 T1b = TD + T1a;
Chris@82 744 T2f = T1a - TD;
Chris@82 745 T2m = T2i + T2l;
Chris@82 746 T2t = T2p + T2s;
Chris@82 747 T2u = T2m - T2t;
Chris@82 748 T2w = T2m + T2t;
Chris@82 749 }
Chris@82 750 {
Chris@82 751 E T1A, T1X, T25, T2c;
Chris@82 752 T1A = T1o - T1z;
Chris@82 753 T1X = T1L + T1W;
Chris@82 754 T1Y = T1A - T1X;
Chris@82 755 T2e = T1X + T1A;
Chris@82 756 T25 = T21 + T24;
Chris@82 757 T2c = T28 + T2b;
Chris@82 758 T2d = T25 - T2c;
Chris@82 759 T2v = T25 + T2c;
Chris@82 760 }
Chris@82 761 Ip[0] = KP500000000 * (T1b + T1Y);
Chris@82 762 Rp[0] = KP500000000 * (T2v + T2w);
Chris@82 763 Im[WS(rs, 7)] = KP500000000 * (T1Y - T1b);
Chris@82 764 Rm[WS(rs, 7)] = KP500000000 * (T2v - T2w);
Chris@82 765 Rm[WS(rs, 3)] = KP500000000 * (T2d - T2e);
Chris@82 766 Im[WS(rs, 3)] = KP500000000 * (T2u - T2f);
Chris@82 767 Rp[WS(rs, 4)] = KP500000000 * (T2d + T2e);
Chris@82 768 Ip[WS(rs, 4)] = KP500000000 * (T2f + T2u);
Chris@82 769 }
Chris@82 770 {
Chris@82 771 E T2z, T2L, T2J, T2P, T2C, T2M, T2F, T2N;
Chris@82 772 {
Chris@82 773 E T2x, T2y, T2H, T2I;
Chris@82 774 T2x = T2b - T28;
Chris@82 775 T2y = T19 - TW;
Chris@82 776 T2z = KP500000000 * (T2x + T2y);
Chris@82 777 T2L = KP500000000 * (T2y - T2x);
Chris@82 778 T2H = T21 - T24;
Chris@82 779 T2I = Tj - TC;
Chris@82 780 T2J = KP500000000 * (T2H - T2I);
Chris@82 781 T2P = KP500000000 * (T2H + T2I);
Chris@82 782 }
Chris@82 783 {
Chris@82 784 E T2A, T2B, T2D, T2E;
Chris@82 785 T2A = T2l - T2i;
Chris@82 786 T2B = T1L - T1W;
Chris@82 787 T2C = T2A + T2B;
Chris@82 788 T2M = T2A - T2B;
Chris@82 789 T2D = T1z + T1o;
Chris@82 790 T2E = T2s - T2p;
Chris@82 791 T2F = T2D - T2E;
Chris@82 792 T2N = T2D + T2E;
Chris@82 793 }
Chris@82 794 {
Chris@82 795 E T2G, T2Q, T2K, T2O;
Chris@82 796 T2G = KP353553390 * (T2C + T2F);
Chris@82 797 Ip[WS(rs, 2)] = T2z + T2G;
Chris@82 798 Im[WS(rs, 5)] = T2G - T2z;
Chris@82 799 T2Q = KP353553390 * (T2M + T2N);
Chris@82 800 Rm[WS(rs, 5)] = T2P - T2Q;
Chris@82 801 Rp[WS(rs, 2)] = T2P + T2Q;
Chris@82 802 T2K = KP353553390 * (T2F - T2C);
Chris@82 803 Rm[WS(rs, 1)] = T2J - T2K;
Chris@82 804 Rp[WS(rs, 6)] = T2J + T2K;
Chris@82 805 T2O = KP353553390 * (T2M - T2N);
Chris@82 806 Ip[WS(rs, 6)] = T2L + T2O;
Chris@82 807 Im[WS(rs, 1)] = T2O - T2L;
Chris@82 808 }
Chris@82 809 }
Chris@82 810 {
Chris@82 811 E T30, T3w, T3F, T3j, T2X, T3G, T3D, T3L, T3m, T3v, T38, T3q, T3A, T3K, T3f;
Chris@82 812 E T3r;
Chris@82 813 {
Chris@82 814 E T2T, T2W, T34, T37;
Chris@82 815 T30 = KP500000000 * (T2Y - T2Z);
Chris@82 816 T3w = KP500000000 * (T2Z + T2Y);
Chris@82 817 T3F = KP500000000 * (T3h - T3i);
Chris@82 818 T3j = KP500000000 * (T3h + T3i);
Chris@82 819 T2T = T2R - T2S;
Chris@82 820 T2W = T2U + T2V;
Chris@82 821 T2X = KP353553390 * (T2T + T2W);
Chris@82 822 T3G = KP353553390 * (T2T - T2W);
Chris@82 823 {
Chris@82 824 E T3B, T3C, T3k, T3l;
Chris@82 825 T3B = T3a + T39;
Chris@82 826 T3C = T3d - T3c;
Chris@82 827 T3D = FNMS(KP461939766, T3C, KP191341716 * T3B);
Chris@82 828 T3L = FMA(KP461939766, T3B, KP191341716 * T3C);
Chris@82 829 T3k = T2S + T2R;
Chris@82 830 T3l = T2U - T2V;
Chris@82 831 T3m = KP353553390 * (T3k + T3l);
Chris@82 832 T3v = KP353553390 * (T3l - T3k);
Chris@82 833 }
Chris@82 834 T34 = T32 + T33;
Chris@82 835 T37 = T35 - T36;
Chris@82 836 T38 = FMA(KP191341716, T34, KP461939766 * T37);
Chris@82 837 T3q = FNMS(KP191341716, T37, KP461939766 * T34);
Chris@82 838 {
Chris@82 839 E T3y, T3z, T3b, T3e;
Chris@82 840 T3y = T33 - T32;
Chris@82 841 T3z = T36 + T35;
Chris@82 842 T3A = FMA(KP461939766, T3y, KP191341716 * T3z);
Chris@82 843 T3K = FNMS(KP461939766, T3z, KP191341716 * T3y);
Chris@82 844 T3b = T39 - T3a;
Chris@82 845 T3e = T3c + T3d;
Chris@82 846 T3f = FNMS(KP191341716, T3e, KP461939766 * T3b);
Chris@82 847 T3r = FMA(KP191341716, T3b, KP461939766 * T3e);
Chris@82 848 }
Chris@82 849 }
Chris@82 850 {
Chris@82 851 E T31, T3g, T3t, T3u;
Chris@82 852 T31 = T2X + T30;
Chris@82 853 T3g = T38 + T3f;
Chris@82 854 Ip[WS(rs, 1)] = T31 + T3g;
Chris@82 855 Im[WS(rs, 6)] = T3g - T31;
Chris@82 856 T3t = T3j + T3m;
Chris@82 857 T3u = T3q + T3r;
Chris@82 858 Rm[WS(rs, 6)] = T3t - T3u;
Chris@82 859 Rp[WS(rs, 1)] = T3t + T3u;
Chris@82 860 }
Chris@82 861 {
Chris@82 862 E T3n, T3o, T3p, T3s;
Chris@82 863 T3n = T3j - T3m;
Chris@82 864 T3o = T3f - T38;
Chris@82 865 Rm[WS(rs, 2)] = T3n - T3o;
Chris@82 866 Rp[WS(rs, 5)] = T3n + T3o;
Chris@82 867 T3p = T30 - T2X;
Chris@82 868 T3s = T3q - T3r;
Chris@82 869 Ip[WS(rs, 5)] = T3p + T3s;
Chris@82 870 Im[WS(rs, 2)] = T3s - T3p;
Chris@82 871 }
Chris@82 872 {
Chris@82 873 E T3x, T3E, T3N, T3O;
Chris@82 874 T3x = T3v + T3w;
Chris@82 875 T3E = T3A + T3D;
Chris@82 876 Ip[WS(rs, 3)] = T3x + T3E;
Chris@82 877 Im[WS(rs, 4)] = T3E - T3x;
Chris@82 878 T3N = T3F + T3G;
Chris@82 879 T3O = T3K + T3L;
Chris@82 880 Rm[WS(rs, 4)] = T3N - T3O;
Chris@82 881 Rp[WS(rs, 3)] = T3N + T3O;
Chris@82 882 }
Chris@82 883 {
Chris@82 884 E T3H, T3I, T3J, T3M;
Chris@82 885 T3H = T3F - T3G;
Chris@82 886 T3I = T3D - T3A;
Chris@82 887 Rm[0] = T3H - T3I;
Chris@82 888 Rp[WS(rs, 7)] = T3H + T3I;
Chris@82 889 T3J = T3w - T3v;
Chris@82 890 T3M = T3K - T3L;
Chris@82 891 Ip[WS(rs, 7)] = T3J + T3M;
Chris@82 892 Im[0] = T3M - T3J;
Chris@82 893 }
Chris@82 894 }
Chris@82 895 }
Chris@82 896 }
Chris@82 897 }
Chris@82 898
Chris@82 899 static const tw_instr twinstr[] = {
Chris@82 900 {TW_FULL, 1, 16},
Chris@82 901 {TW_NEXT, 1, 0}
Chris@82 902 };
Chris@82 903
Chris@82 904 static const hc2c_desc desc = { 16, "hc2cfdft_16", twinstr, &GENUS, {168, 62, 38, 0} };
Chris@82 905
Chris@82 906 void X(codelet_hc2cfdft_16) (planner *p) {
Chris@82 907 X(khc2c_register) (p, hc2cfdft_16, &desc, HC2C_VIA_DFT);
Chris@82 908 }
Chris@82 909 #endif