annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cf_12.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:57 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cf_12 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 118 FP additions, 68 FP multiplications,
Chris@82 32 * (or, 72 additions, 22 multiplications, 46 fused multiply/add),
Chris@82 33 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cf_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT m;
Chris@82 43 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 44 E T1, T2i, Tl, T2e, T10, T1Y, TG, T1S, Ty, T2s, T1s, T2f, T1d, T21, T1H;
Chris@82 45 E T1Z, Te, T2p, T1l, T2h, TT, T1V, T1A, T1T;
Chris@82 46 T1 = Rp[0];
Chris@82 47 T2i = Rm[0];
Chris@82 48 {
Chris@82 49 E Th, Tk, Ti, T2d, Tg, Tj;
Chris@82 50 Th = Rp[WS(rs, 3)];
Chris@82 51 Tk = Rm[WS(rs, 3)];
Chris@82 52 Tg = W[10];
Chris@82 53 Ti = Tg * Th;
Chris@82 54 T2d = Tg * Tk;
Chris@82 55 Tj = W[11];
Chris@82 56 Tl = FMA(Tj, Tk, Ti);
Chris@82 57 T2e = FNMS(Tj, Th, T2d);
Chris@82 58 }
Chris@82 59 {
Chris@82 60 E TW, TZ, TX, T1X, TV, TY;
Chris@82 61 TW = Ip[WS(rs, 4)];
Chris@82 62 TZ = Im[WS(rs, 4)];
Chris@82 63 TV = W[16];
Chris@82 64 TX = TV * TW;
Chris@82 65 T1X = TV * TZ;
Chris@82 66 TY = W[17];
Chris@82 67 T10 = FMA(TY, TZ, TX);
Chris@82 68 T1Y = FNMS(TY, TW, T1X);
Chris@82 69 }
Chris@82 70 {
Chris@82 71 E TC, TF, TD, T1R, TB, TE;
Chris@82 72 TC = Ip[WS(rs, 1)];
Chris@82 73 TF = Im[WS(rs, 1)];
Chris@82 74 TB = W[4];
Chris@82 75 TD = TB * TC;
Chris@82 76 T1R = TB * TF;
Chris@82 77 TE = W[5];
Chris@82 78 TG = FMA(TE, TF, TD);
Chris@82 79 T1S = FNMS(TE, TC, T1R);
Chris@82 80 }
Chris@82 81 {
Chris@82 82 E Tn, Tq, To, T1o, Tt, Tw, Tu, T1q, Tm, Ts;
Chris@82 83 Tn = Rp[WS(rs, 5)];
Chris@82 84 Tq = Rm[WS(rs, 5)];
Chris@82 85 Tm = W[18];
Chris@82 86 To = Tm * Tn;
Chris@82 87 T1o = Tm * Tq;
Chris@82 88 Tt = Rp[WS(rs, 1)];
Chris@82 89 Tw = Rm[WS(rs, 1)];
Chris@82 90 Ts = W[2];
Chris@82 91 Tu = Ts * Tt;
Chris@82 92 T1q = Ts * Tw;
Chris@82 93 {
Chris@82 94 E Tr, T1p, Tx, T1r, Tp, Tv;
Chris@82 95 Tp = W[19];
Chris@82 96 Tr = FMA(Tp, Tq, To);
Chris@82 97 T1p = FNMS(Tp, Tn, T1o);
Chris@82 98 Tv = W[3];
Chris@82 99 Tx = FMA(Tv, Tw, Tu);
Chris@82 100 T1r = FNMS(Tv, Tt, T1q);
Chris@82 101 Ty = Tr + Tx;
Chris@82 102 T2s = Tx - Tr;
Chris@82 103 T1s = T1p - T1r;
Chris@82 104 T2f = T1p + T1r;
Chris@82 105 }
Chris@82 106 }
Chris@82 107 {
Chris@82 108 E T12, T15, T13, T1D, T18, T1b, T19, T1F, T11, T17;
Chris@82 109 T12 = Ip[0];
Chris@82 110 T15 = Im[0];
Chris@82 111 T11 = W[0];
Chris@82 112 T13 = T11 * T12;
Chris@82 113 T1D = T11 * T15;
Chris@82 114 T18 = Ip[WS(rs, 2)];
Chris@82 115 T1b = Im[WS(rs, 2)];
Chris@82 116 T17 = W[8];
Chris@82 117 T19 = T17 * T18;
Chris@82 118 T1F = T17 * T1b;
Chris@82 119 {
Chris@82 120 E T16, T1E, T1c, T1G, T14, T1a;
Chris@82 121 T14 = W[1];
Chris@82 122 T16 = FMA(T14, T15, T13);
Chris@82 123 T1E = FNMS(T14, T12, T1D);
Chris@82 124 T1a = W[9];
Chris@82 125 T1c = FMA(T1a, T1b, T19);
Chris@82 126 T1G = FNMS(T1a, T18, T1F);
Chris@82 127 T1d = T16 + T1c;
Chris@82 128 T21 = T1c - T16;
Chris@82 129 T1H = T1E - T1G;
Chris@82 130 T1Z = T1E + T1G;
Chris@82 131 }
Chris@82 132 }
Chris@82 133 {
Chris@82 134 E T3, T6, T4, T1h, T9, Tc, Ta, T1j, T2, T8;
Chris@82 135 T3 = Rp[WS(rs, 2)];
Chris@82 136 T6 = Rm[WS(rs, 2)];
Chris@82 137 T2 = W[6];
Chris@82 138 T4 = T2 * T3;
Chris@82 139 T1h = T2 * T6;
Chris@82 140 T9 = Rp[WS(rs, 4)];
Chris@82 141 Tc = Rm[WS(rs, 4)];
Chris@82 142 T8 = W[14];
Chris@82 143 Ta = T8 * T9;
Chris@82 144 T1j = T8 * Tc;
Chris@82 145 {
Chris@82 146 E T7, T1i, Td, T1k, T5, Tb;
Chris@82 147 T5 = W[7];
Chris@82 148 T7 = FMA(T5, T6, T4);
Chris@82 149 T1i = FNMS(T5, T3, T1h);
Chris@82 150 Tb = W[15];
Chris@82 151 Td = FMA(Tb, Tc, Ta);
Chris@82 152 T1k = FNMS(Tb, T9, T1j);
Chris@82 153 Te = T7 + Td;
Chris@82 154 T2p = Td - T7;
Chris@82 155 T1l = T1i - T1k;
Chris@82 156 T2h = T1i + T1k;
Chris@82 157 }
Chris@82 158 }
Chris@82 159 {
Chris@82 160 E TI, TL, TJ, T1w, TO, TR, TP, T1y, TH, TN;
Chris@82 161 TI = Ip[WS(rs, 3)];
Chris@82 162 TL = Im[WS(rs, 3)];
Chris@82 163 TH = W[12];
Chris@82 164 TJ = TH * TI;
Chris@82 165 T1w = TH * TL;
Chris@82 166 TO = Ip[WS(rs, 5)];
Chris@82 167 TR = Im[WS(rs, 5)];
Chris@82 168 TN = W[20];
Chris@82 169 TP = TN * TO;
Chris@82 170 T1y = TN * TR;
Chris@82 171 {
Chris@82 172 E TM, T1x, TS, T1z, TK, TQ;
Chris@82 173 TK = W[13];
Chris@82 174 TM = FMA(TK, TL, TJ);
Chris@82 175 T1x = FNMS(TK, TI, T1w);
Chris@82 176 TQ = W[21];
Chris@82 177 TS = FMA(TQ, TR, TP);
Chris@82 178 T1z = FNMS(TQ, TO, T1y);
Chris@82 179 TT = TM + TS;
Chris@82 180 T1V = TS - TM;
Chris@82 181 T1A = T1x - T1z;
Chris@82 182 T1T = T1x + T1z;
Chris@82 183 }
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E TA, T28, T2k, T2m, T1f, T2l, T2b, T2c;
Chris@82 187 {
Chris@82 188 E Tf, Tz, T2g, T2j;
Chris@82 189 Tf = T1 + Te;
Chris@82 190 Tz = Tl + Ty;
Chris@82 191 TA = Tf + Tz;
Chris@82 192 T28 = Tf - Tz;
Chris@82 193 T2g = T2e + T2f;
Chris@82 194 T2j = T2h + T2i;
Chris@82 195 T2k = T2g + T2j;
Chris@82 196 T2m = T2j - T2g;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E TU, T1e, T29, T2a;
Chris@82 200 TU = TG + TT;
Chris@82 201 T1e = T10 + T1d;
Chris@82 202 T1f = TU + T1e;
Chris@82 203 T2l = TU - T1e;
Chris@82 204 T29 = T1S + T1T;
Chris@82 205 T2a = T1Y + T1Z;
Chris@82 206 T2b = T29 - T2a;
Chris@82 207 T2c = T29 + T2a;
Chris@82 208 }
Chris@82 209 Rm[WS(rs, 5)] = TA - T1f;
Chris@82 210 Im[WS(rs, 5)] = T2c - T2k;
Chris@82 211 Rp[0] = TA + T1f;
Chris@82 212 Ip[0] = T2c + T2k;
Chris@82 213 Rp[WS(rs, 3)] = T28 - T2b;
Chris@82 214 Ip[WS(rs, 3)] = T2l + T2m;
Chris@82 215 Rm[WS(rs, 2)] = T28 + T2b;
Chris@82 216 Im[WS(rs, 2)] = T2l - T2m;
Chris@82 217 }
Chris@82 218 {
Chris@82 219 E T1m, T1K, T2q, T2z, T2t, T2y, T1t, T1L, T1B, T1N, T1W, T25, T22, T26, T1I;
Chris@82 220 E T1O;
Chris@82 221 {
Chris@82 222 E T1g, T2o, T2r, T1n;
Chris@82 223 T1g = FNMS(KP500000000, Te, T1);
Chris@82 224 T1m = FNMS(KP866025403, T1l, T1g);
Chris@82 225 T1K = FMA(KP866025403, T1l, T1g);
Chris@82 226 T2o = FNMS(KP500000000, T2h, T2i);
Chris@82 227 T2q = FMA(KP866025403, T2p, T2o);
Chris@82 228 T2z = FNMS(KP866025403, T2p, T2o);
Chris@82 229 T2r = FNMS(KP500000000, T2f, T2e);
Chris@82 230 T2t = FMA(KP866025403, T2s, T2r);
Chris@82 231 T2y = FNMS(KP866025403, T2s, T2r);
Chris@82 232 T1n = FNMS(KP500000000, Ty, Tl);
Chris@82 233 T1t = FNMS(KP866025403, T1s, T1n);
Chris@82 234 T1L = FMA(KP866025403, T1s, T1n);
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E T1v, T1U, T20, T1C;
Chris@82 238 T1v = FNMS(KP500000000, TT, TG);
Chris@82 239 T1B = FNMS(KP866025403, T1A, T1v);
Chris@82 240 T1N = FMA(KP866025403, T1A, T1v);
Chris@82 241 T1U = FNMS(KP500000000, T1T, T1S);
Chris@82 242 T1W = FNMS(KP866025403, T1V, T1U);
Chris@82 243 T25 = FMA(KP866025403, T1V, T1U);
Chris@82 244 T20 = FNMS(KP500000000, T1Z, T1Y);
Chris@82 245 T22 = FNMS(KP866025403, T21, T20);
Chris@82 246 T26 = FMA(KP866025403, T21, T20);
Chris@82 247 T1C = FNMS(KP500000000, T1d, T10);
Chris@82 248 T1I = FNMS(KP866025403, T1H, T1C);
Chris@82 249 T1O = FMA(KP866025403, T1H, T1C);
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T1u, T1J, T2x, T2A;
Chris@82 253 T1u = T1m + T1t;
Chris@82 254 T1J = T1B + T1I;
Chris@82 255 Rp[WS(rs, 2)] = T1u - T1J;
Chris@82 256 Rm[WS(rs, 3)] = T1u + T1J;
Chris@82 257 T2x = T1W + T22;
Chris@82 258 T2A = T2y + T2z;
Chris@82 259 Im[WS(rs, 3)] = -(T2x + T2A);
Chris@82 260 Ip[WS(rs, 2)] = T2A - T2x;
Chris@82 261 }
Chris@82 262 {
Chris@82 263 E T1M, T1P, T2v, T2w;
Chris@82 264 T1M = T1K + T1L;
Chris@82 265 T1P = T1N + T1O;
Chris@82 266 Rm[WS(rs, 1)] = T1M - T1P;
Chris@82 267 Rp[WS(rs, 4)] = T1M + T1P;
Chris@82 268 T2v = T25 + T26;
Chris@82 269 T2w = T2t + T2q;
Chris@82 270 Im[WS(rs, 1)] = T2v - T2w;
Chris@82 271 Ip[WS(rs, 4)] = T2v + T2w;
Chris@82 272 }
Chris@82 273 {
Chris@82 274 E T1Q, T23, T2B, T2C;
Chris@82 275 T1Q = T1m - T1t;
Chris@82 276 T23 = T1W - T22;
Chris@82 277 Rm[0] = T1Q - T23;
Chris@82 278 Rp[WS(rs, 5)] = T1Q + T23;
Chris@82 279 T2B = T1I - T1B;
Chris@82 280 T2C = T2z - T2y;
Chris@82 281 Im[0] = T2B - T2C;
Chris@82 282 Ip[WS(rs, 5)] = T2B + T2C;
Chris@82 283 }
Chris@82 284 {
Chris@82 285 E T24, T27, T2n, T2u;
Chris@82 286 T24 = T1K - T1L;
Chris@82 287 T27 = T25 - T26;
Chris@82 288 Rm[WS(rs, 4)] = T24 - T27;
Chris@82 289 Rp[WS(rs, 1)] = T24 + T27;
Chris@82 290 T2n = T1O - T1N;
Chris@82 291 T2u = T2q - T2t;
Chris@82 292 Im[WS(rs, 4)] = T2n - T2u;
Chris@82 293 Ip[WS(rs, 1)] = T2n + T2u;
Chris@82 294 }
Chris@82 295 }
Chris@82 296 }
Chris@82 297 }
Chris@82 298 }
Chris@82 299
Chris@82 300 static const tw_instr twinstr[] = {
Chris@82 301 {TW_FULL, 1, 12},
Chris@82 302 {TW_NEXT, 1, 0}
Chris@82 303 };
Chris@82 304
Chris@82 305 static const hc2c_desc desc = { 12, "hc2cf_12", twinstr, &GENUS, {72, 22, 46, 0} };
Chris@82 306
Chris@82 307 void X(codelet_hc2cf_12) (planner *p) {
Chris@82 308 X(khc2c_register) (p, hc2cf_12, &desc, HC2C_VIA_RDFT);
Chris@82 309 }
Chris@82 310 #else
Chris@82 311
Chris@82 312 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 12 -dit -name hc2cf_12 -include rdft/scalar/hc2cf.h */
Chris@82 313
Chris@82 314 /*
Chris@82 315 * This function contains 118 FP additions, 60 FP multiplications,
Chris@82 316 * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 317 * 47 stack variables, 2 constants, and 48 memory accesses
Chris@82 318 */
Chris@82 319 #include "rdft/scalar/hc2cf.h"
Chris@82 320
Chris@82 321 static void hc2cf_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 322 {
Chris@82 323 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 324 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 325 {
Chris@82 326 INT m;
Chris@82 327 for (m = mb, W = W + ((mb - 1) * 22); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 22, MAKE_VOLATILE_STRIDE(48, rs)) {
Chris@82 328 E T1, T1W, T18, T22, Tc, T15, T1V, T23, TR, T1E, T1o, T1D, T12, T1l, T1F;
Chris@82 329 E T1G, Ti, T1S, T1d, T25, Tt, T1a, T1T, T26, TA, T1y, T1j, T1B, TL, T1g;
Chris@82 330 E T1z, T1A;
Chris@82 331 {
Chris@82 332 E T6, T16, Tb, T17;
Chris@82 333 T1 = Rp[0];
Chris@82 334 T1W = Rm[0];
Chris@82 335 {
Chris@82 336 E T3, T5, T2, T4;
Chris@82 337 T3 = Rp[WS(rs, 2)];
Chris@82 338 T5 = Rm[WS(rs, 2)];
Chris@82 339 T2 = W[6];
Chris@82 340 T4 = W[7];
Chris@82 341 T6 = FMA(T2, T3, T4 * T5);
Chris@82 342 T16 = FNMS(T4, T3, T2 * T5);
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T8, Ta, T7, T9;
Chris@82 346 T8 = Rp[WS(rs, 4)];
Chris@82 347 Ta = Rm[WS(rs, 4)];
Chris@82 348 T7 = W[14];
Chris@82 349 T9 = W[15];
Chris@82 350 Tb = FMA(T7, T8, T9 * Ta);
Chris@82 351 T17 = FNMS(T9, T8, T7 * Ta);
Chris@82 352 }
Chris@82 353 T18 = KP866025403 * (T16 - T17);
Chris@82 354 T22 = KP866025403 * (Tb - T6);
Chris@82 355 Tc = T6 + Tb;
Chris@82 356 T15 = FNMS(KP500000000, Tc, T1);
Chris@82 357 T1V = T16 + T17;
Chris@82 358 T23 = FNMS(KP500000000, T1V, T1W);
Chris@82 359 }
Chris@82 360 {
Chris@82 361 E T11, T1n, TW, T1m;
Chris@82 362 {
Chris@82 363 E TO, TQ, TN, TP;
Chris@82 364 TO = Ip[WS(rs, 4)];
Chris@82 365 TQ = Im[WS(rs, 4)];
Chris@82 366 TN = W[16];
Chris@82 367 TP = W[17];
Chris@82 368 TR = FMA(TN, TO, TP * TQ);
Chris@82 369 T1E = FNMS(TP, TO, TN * TQ);
Chris@82 370 }
Chris@82 371 {
Chris@82 372 E TY, T10, TX, TZ;
Chris@82 373 TY = Ip[WS(rs, 2)];
Chris@82 374 T10 = Im[WS(rs, 2)];
Chris@82 375 TX = W[8];
Chris@82 376 TZ = W[9];
Chris@82 377 T11 = FMA(TX, TY, TZ * T10);
Chris@82 378 T1n = FNMS(TZ, TY, TX * T10);
Chris@82 379 }
Chris@82 380 {
Chris@82 381 E TT, TV, TS, TU;
Chris@82 382 TT = Ip[0];
Chris@82 383 TV = Im[0];
Chris@82 384 TS = W[0];
Chris@82 385 TU = W[1];
Chris@82 386 TW = FMA(TS, TT, TU * TV);
Chris@82 387 T1m = FNMS(TU, TT, TS * TV);
Chris@82 388 }
Chris@82 389 T1o = KP866025403 * (T1m - T1n);
Chris@82 390 T1D = KP866025403 * (T11 - TW);
Chris@82 391 T12 = TW + T11;
Chris@82 392 T1l = FNMS(KP500000000, T12, TR);
Chris@82 393 T1F = T1m + T1n;
Chris@82 394 T1G = FNMS(KP500000000, T1F, T1E);
Chris@82 395 }
Chris@82 396 {
Chris@82 397 E Ts, T1c, Tn, T1b;
Chris@82 398 {
Chris@82 399 E Tf, Th, Te, Tg;
Chris@82 400 Tf = Rp[WS(rs, 3)];
Chris@82 401 Th = Rm[WS(rs, 3)];
Chris@82 402 Te = W[10];
Chris@82 403 Tg = W[11];
Chris@82 404 Ti = FMA(Te, Tf, Tg * Th);
Chris@82 405 T1S = FNMS(Tg, Tf, Te * Th);
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E Tp, Tr, To, Tq;
Chris@82 409 Tp = Rp[WS(rs, 1)];
Chris@82 410 Tr = Rm[WS(rs, 1)];
Chris@82 411 To = W[2];
Chris@82 412 Tq = W[3];
Chris@82 413 Ts = FMA(To, Tp, Tq * Tr);
Chris@82 414 T1c = FNMS(Tq, Tp, To * Tr);
Chris@82 415 }
Chris@82 416 {
Chris@82 417 E Tk, Tm, Tj, Tl;
Chris@82 418 Tk = Rp[WS(rs, 5)];
Chris@82 419 Tm = Rm[WS(rs, 5)];
Chris@82 420 Tj = W[18];
Chris@82 421 Tl = W[19];
Chris@82 422 Tn = FMA(Tj, Tk, Tl * Tm);
Chris@82 423 T1b = FNMS(Tl, Tk, Tj * Tm);
Chris@82 424 }
Chris@82 425 T1d = KP866025403 * (T1b - T1c);
Chris@82 426 T25 = KP866025403 * (Ts - Tn);
Chris@82 427 Tt = Tn + Ts;
Chris@82 428 T1a = FNMS(KP500000000, Tt, Ti);
Chris@82 429 T1T = T1b + T1c;
Chris@82 430 T26 = FNMS(KP500000000, T1T, T1S);
Chris@82 431 }
Chris@82 432 {
Chris@82 433 E TK, T1i, TF, T1h;
Chris@82 434 {
Chris@82 435 E Tx, Tz, Tw, Ty;
Chris@82 436 Tx = Ip[WS(rs, 1)];
Chris@82 437 Tz = Im[WS(rs, 1)];
Chris@82 438 Tw = W[4];
Chris@82 439 Ty = W[5];
Chris@82 440 TA = FMA(Tw, Tx, Ty * Tz);
Chris@82 441 T1y = FNMS(Ty, Tx, Tw * Tz);
Chris@82 442 }
Chris@82 443 {
Chris@82 444 E TH, TJ, TG, TI;
Chris@82 445 TH = Ip[WS(rs, 5)];
Chris@82 446 TJ = Im[WS(rs, 5)];
Chris@82 447 TG = W[20];
Chris@82 448 TI = W[21];
Chris@82 449 TK = FMA(TG, TH, TI * TJ);
Chris@82 450 T1i = FNMS(TI, TH, TG * TJ);
Chris@82 451 }
Chris@82 452 {
Chris@82 453 E TC, TE, TB, TD;
Chris@82 454 TC = Ip[WS(rs, 3)];
Chris@82 455 TE = Im[WS(rs, 3)];
Chris@82 456 TB = W[12];
Chris@82 457 TD = W[13];
Chris@82 458 TF = FMA(TB, TC, TD * TE);
Chris@82 459 T1h = FNMS(TD, TC, TB * TE);
Chris@82 460 }
Chris@82 461 T1j = KP866025403 * (T1h - T1i);
Chris@82 462 T1B = KP866025403 * (TK - TF);
Chris@82 463 TL = TF + TK;
Chris@82 464 T1g = FNMS(KP500000000, TL, TA);
Chris@82 465 T1z = T1h + T1i;
Chris@82 466 T1A = FNMS(KP500000000, T1z, T1y);
Chris@82 467 }
Chris@82 468 {
Chris@82 469 E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R;
Chris@82 470 {
Chris@82 471 E Td, Tu, T1U, T1X;
Chris@82 472 Td = T1 + Tc;
Chris@82 473 Tu = Ti + Tt;
Chris@82 474 Tv = Td + Tu;
Chris@82 475 T1N = Td - Tu;
Chris@82 476 T1U = T1S + T1T;
Chris@82 477 T1X = T1V + T1W;
Chris@82 478 T1Y = T1U + T1X;
Chris@82 479 T20 = T1X - T1U;
Chris@82 480 }
Chris@82 481 {
Chris@82 482 E TM, T13, T1O, T1P;
Chris@82 483 TM = TA + TL;
Chris@82 484 T13 = TR + T12;
Chris@82 485 T14 = TM + T13;
Chris@82 486 T1Z = TM - T13;
Chris@82 487 T1O = T1y + T1z;
Chris@82 488 T1P = T1E + T1F;
Chris@82 489 T1Q = T1O - T1P;
Chris@82 490 T1R = T1O + T1P;
Chris@82 491 }
Chris@82 492 Rm[WS(rs, 5)] = Tv - T14;
Chris@82 493 Im[WS(rs, 5)] = T1R - T1Y;
Chris@82 494 Rp[0] = Tv + T14;
Chris@82 495 Ip[0] = T1R + T1Y;
Chris@82 496 Rp[WS(rs, 3)] = T1N - T1Q;
Chris@82 497 Ip[WS(rs, 3)] = T1Z + T20;
Chris@82 498 Rm[WS(rs, 2)] = T1N + T1Q;
Chris@82 499 Im[WS(rs, 2)] = T1Z - T20;
Chris@82 500 }
Chris@82 501 {
Chris@82 502 E T1t, T1J, T28, T2a, T1w, T21, T1M, T29;
Chris@82 503 {
Chris@82 504 E T1r, T1s, T24, T27;
Chris@82 505 T1r = T15 + T18;
Chris@82 506 T1s = T1a + T1d;
Chris@82 507 T1t = T1r + T1s;
Chris@82 508 T1J = T1r - T1s;
Chris@82 509 T24 = T22 + T23;
Chris@82 510 T27 = T25 + T26;
Chris@82 511 T28 = T24 - T27;
Chris@82 512 T2a = T27 + T24;
Chris@82 513 }
Chris@82 514 {
Chris@82 515 E T1u, T1v, T1K, T1L;
Chris@82 516 T1u = T1g + T1j;
Chris@82 517 T1v = T1l + T1o;
Chris@82 518 T1w = T1u + T1v;
Chris@82 519 T21 = T1v - T1u;
Chris@82 520 T1K = T1B + T1A;
Chris@82 521 T1L = T1D + T1G;
Chris@82 522 T1M = T1K - T1L;
Chris@82 523 T29 = T1K + T1L;
Chris@82 524 }
Chris@82 525 Rm[WS(rs, 1)] = T1t - T1w;
Chris@82 526 Im[WS(rs, 1)] = T29 - T2a;
Chris@82 527 Rp[WS(rs, 4)] = T1t + T1w;
Chris@82 528 Ip[WS(rs, 4)] = T29 + T2a;
Chris@82 529 Rm[WS(rs, 4)] = T1J - T1M;
Chris@82 530 Im[WS(rs, 4)] = T21 - T28;
Chris@82 531 Rp[WS(rs, 1)] = T1J + T1M;
Chris@82 532 Ip[WS(rs, 1)] = T21 + T28;
Chris@82 533 }
Chris@82 534 {
Chris@82 535 E T1f, T1x, T2e, T2g, T1q, T2f, T1I, T2b;
Chris@82 536 {
Chris@82 537 E T19, T1e, T2c, T2d;
Chris@82 538 T19 = T15 - T18;
Chris@82 539 T1e = T1a - T1d;
Chris@82 540 T1f = T19 + T1e;
Chris@82 541 T1x = T19 - T1e;
Chris@82 542 T2c = T26 - T25;
Chris@82 543 T2d = T23 - T22;
Chris@82 544 T2e = T2c + T2d;
Chris@82 545 T2g = T2d - T2c;
Chris@82 546 }
Chris@82 547 {
Chris@82 548 E T1k, T1p, T1C, T1H;
Chris@82 549 T1k = T1g - T1j;
Chris@82 550 T1p = T1l - T1o;
Chris@82 551 T1q = T1k + T1p;
Chris@82 552 T2f = T1p - T1k;
Chris@82 553 T1C = T1A - T1B;
Chris@82 554 T1H = T1D - T1G;
Chris@82 555 T1I = T1C + T1H;
Chris@82 556 T2b = T1H - T1C;
Chris@82 557 }
Chris@82 558 Rp[WS(rs, 2)] = T1f - T1q;
Chris@82 559 Ip[WS(rs, 2)] = T2b + T2e;
Chris@82 560 Rm[WS(rs, 3)] = T1f + T1q;
Chris@82 561 Im[WS(rs, 3)] = T2b - T2e;
Chris@82 562 Rm[0] = T1x - T1I;
Chris@82 563 Im[0] = T2f - T2g;
Chris@82 564 Rp[WS(rs, 5)] = T1x + T1I;
Chris@82 565 Ip[WS(rs, 5)] = T2f + T2g;
Chris@82 566 }
Chris@82 567 }
Chris@82 568 }
Chris@82 569 }
Chris@82 570
Chris@82 571 static const tw_instr twinstr[] = {
Chris@82 572 {TW_FULL, 1, 12},
Chris@82 573 {TW_NEXT, 1, 0}
Chris@82 574 };
Chris@82 575
Chris@82 576 static const hc2c_desc desc = { 12, "hc2cf_12", twinstr, &GENUS, {88, 30, 30, 0} };
Chris@82 577
Chris@82 578 void X(codelet_hc2cf_12) (planner *p) {
Chris@82 579 X(khc2c_register) (p, hc2cf_12, &desc, HC2C_VIA_RDFT);
Chris@82 580 }
Chris@82 581 #endif