annotate src/fftw-3.3.8/rdft/scalar/r2cf/hc2cf_20.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:08 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cf_20 -include rdft/scalar/hc2cf.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 246 FP additions, 148 FP multiplications,
Chris@82 32 * (or, 136 additions, 38 multiplications, 110 fused multiply/add),
Chris@82 33 * 61 stack variables, 4 constants, and 80 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cf.h"
Chris@82 36
Chris@82 37 static void hc2cf_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 46 E T8, T4N, T2i, T4r, Tl, T4O, T2n, T4n, TN, T2b, T3T, T4f, T2v, T3v, T3p;
Chris@82 47 E T3F, T27, T2f, T43, T4b, T2R, T3z, T33, T3J, T1G, T2e, T40, T4c, T2K, T3y;
Chris@82 48 E T3a, T3I, T1e, T2c, T3W, T4e, T2C, T3w, T3i, T3G;
Chris@82 49 {
Chris@82 50 E T1, T4q, T3, T6, T4, T4o, T2, T7, T4p, T5;
Chris@82 51 T1 = Rp[0];
Chris@82 52 T4q = Rm[0];
Chris@82 53 T3 = Rp[WS(rs, 5)];
Chris@82 54 T6 = Rm[WS(rs, 5)];
Chris@82 55 T2 = W[18];
Chris@82 56 T4 = T2 * T3;
Chris@82 57 T4o = T2 * T6;
Chris@82 58 T5 = W[19];
Chris@82 59 T7 = FMA(T5, T6, T4);
Chris@82 60 T4p = FNMS(T5, T3, T4o);
Chris@82 61 T8 = T1 + T7;
Chris@82 62 T4N = T4q - T4p;
Chris@82 63 T2i = T1 - T7;
Chris@82 64 T4r = T4p + T4q;
Chris@82 65 }
Chris@82 66 {
Chris@82 67 E Ta, Td, Tb, T2j, Tg, Tj, Th, T2l, T9, Tf;
Chris@82 68 Ta = Ip[WS(rs, 2)];
Chris@82 69 Td = Im[WS(rs, 2)];
Chris@82 70 T9 = W[8];
Chris@82 71 Tb = T9 * Ta;
Chris@82 72 T2j = T9 * Td;
Chris@82 73 Tg = Ip[WS(rs, 7)];
Chris@82 74 Tj = Im[WS(rs, 7)];
Chris@82 75 Tf = W[28];
Chris@82 76 Th = Tf * Tg;
Chris@82 77 T2l = Tf * Tj;
Chris@82 78 {
Chris@82 79 E Te, T2k, Tk, T2m, Tc, Ti;
Chris@82 80 Tc = W[9];
Chris@82 81 Te = FMA(Tc, Td, Tb);
Chris@82 82 T2k = FNMS(Tc, Ta, T2j);
Chris@82 83 Ti = W[29];
Chris@82 84 Tk = FMA(Ti, Tj, Th);
Chris@82 85 T2m = FNMS(Ti, Tg, T2l);
Chris@82 86 Tl = Te + Tk;
Chris@82 87 T4O = Te - Tk;
Chris@82 88 T2n = T2k - T2m;
Chris@82 89 T4n = T2k + T2m;
Chris@82 90 }
Chris@82 91 }
Chris@82 92 {
Chris@82 93 E Ts, T3l, TL, T2t, Ty, T3n, TF, T2r;
Chris@82 94 {
Chris@82 95 E To, Tr, Tp, T3k, Tn, Tq;
Chris@82 96 To = Rp[WS(rs, 2)];
Chris@82 97 Tr = Rm[WS(rs, 2)];
Chris@82 98 Tn = W[6];
Chris@82 99 Tp = Tn * To;
Chris@82 100 T3k = Tn * Tr;
Chris@82 101 Tq = W[7];
Chris@82 102 Ts = FMA(Tq, Tr, Tp);
Chris@82 103 T3l = FNMS(Tq, To, T3k);
Chris@82 104 }
Chris@82 105 {
Chris@82 106 E TH, TK, TI, T2s, TG, TJ;
Chris@82 107 TH = Ip[WS(rs, 9)];
Chris@82 108 TK = Im[WS(rs, 9)];
Chris@82 109 TG = W[36];
Chris@82 110 TI = TG * TH;
Chris@82 111 T2s = TG * TK;
Chris@82 112 TJ = W[37];
Chris@82 113 TL = FMA(TJ, TK, TI);
Chris@82 114 T2t = FNMS(TJ, TH, T2s);
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E Tu, Tx, Tv, T3m, Tt, Tw;
Chris@82 118 Tu = Rp[WS(rs, 7)];
Chris@82 119 Tx = Rm[WS(rs, 7)];
Chris@82 120 Tt = W[26];
Chris@82 121 Tv = Tt * Tu;
Chris@82 122 T3m = Tt * Tx;
Chris@82 123 Tw = W[27];
Chris@82 124 Ty = FMA(Tw, Tx, Tv);
Chris@82 125 T3n = FNMS(Tw, Tu, T3m);
Chris@82 126 }
Chris@82 127 {
Chris@82 128 E TB, TE, TC, T2q, TA, TD;
Chris@82 129 TB = Ip[WS(rs, 4)];
Chris@82 130 TE = Im[WS(rs, 4)];
Chris@82 131 TA = W[16];
Chris@82 132 TC = TA * TB;
Chris@82 133 T2q = TA * TE;
Chris@82 134 TD = W[17];
Chris@82 135 TF = FMA(TD, TE, TC);
Chris@82 136 T2r = FNMS(TD, TB, T2q);
Chris@82 137 }
Chris@82 138 {
Chris@82 139 E Tz, TM, T3R, T3S;
Chris@82 140 Tz = Ts + Ty;
Chris@82 141 TM = TF + TL;
Chris@82 142 TN = Tz - TM;
Chris@82 143 T2b = Tz + TM;
Chris@82 144 T3R = T3l + T3n;
Chris@82 145 T3S = T2r + T2t;
Chris@82 146 T3T = T3R + T3S;
Chris@82 147 T4f = T3S - T3R;
Chris@82 148 }
Chris@82 149 {
Chris@82 150 E T2p, T2u, T3j, T3o;
Chris@82 151 T2p = Ts - Ty;
Chris@82 152 T2u = T2r - T2t;
Chris@82 153 T2v = T2p - T2u;
Chris@82 154 T3v = T2p + T2u;
Chris@82 155 T3j = TL - TF;
Chris@82 156 T3o = T3l - T3n;
Chris@82 157 T3p = T3j - T3o;
Chris@82 158 T3F = T3o + T3j;
Chris@82 159 }
Chris@82 160 }
Chris@82 161 {
Chris@82 162 E T1M, T2Z, T25, T2P, T1S, T31, T1Z, T2N;
Chris@82 163 {
Chris@82 164 E T1I, T1L, T1J, T2Y, T1H, T1K;
Chris@82 165 T1I = Rp[WS(rs, 6)];
Chris@82 166 T1L = Rm[WS(rs, 6)];
Chris@82 167 T1H = W[22];
Chris@82 168 T1J = T1H * T1I;
Chris@82 169 T2Y = T1H * T1L;
Chris@82 170 T1K = W[23];
Chris@82 171 T1M = FMA(T1K, T1L, T1J);
Chris@82 172 T2Z = FNMS(T1K, T1I, T2Y);
Chris@82 173 }
Chris@82 174 {
Chris@82 175 E T21, T24, T22, T2O, T20, T23;
Chris@82 176 T21 = Ip[WS(rs, 3)];
Chris@82 177 T24 = Im[WS(rs, 3)];
Chris@82 178 T20 = W[12];
Chris@82 179 T22 = T20 * T21;
Chris@82 180 T2O = T20 * T24;
Chris@82 181 T23 = W[13];
Chris@82 182 T25 = FMA(T23, T24, T22);
Chris@82 183 T2P = FNMS(T23, T21, T2O);
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E T1O, T1R, T1P, T30, T1N, T1Q;
Chris@82 187 T1O = Rp[WS(rs, 1)];
Chris@82 188 T1R = Rm[WS(rs, 1)];
Chris@82 189 T1N = W[2];
Chris@82 190 T1P = T1N * T1O;
Chris@82 191 T30 = T1N * T1R;
Chris@82 192 T1Q = W[3];
Chris@82 193 T1S = FMA(T1Q, T1R, T1P);
Chris@82 194 T31 = FNMS(T1Q, T1O, T30);
Chris@82 195 }
Chris@82 196 {
Chris@82 197 E T1V, T1Y, T1W, T2M, T1U, T1X;
Chris@82 198 T1V = Ip[WS(rs, 8)];
Chris@82 199 T1Y = Im[WS(rs, 8)];
Chris@82 200 T1U = W[32];
Chris@82 201 T1W = T1U * T1V;
Chris@82 202 T2M = T1U * T1Y;
Chris@82 203 T1X = W[33];
Chris@82 204 T1Z = FMA(T1X, T1Y, T1W);
Chris@82 205 T2N = FNMS(T1X, T1V, T2M);
Chris@82 206 }
Chris@82 207 {
Chris@82 208 E T1T, T26, T41, T42;
Chris@82 209 T1T = T1M + T1S;
Chris@82 210 T26 = T1Z + T25;
Chris@82 211 T27 = T1T - T26;
Chris@82 212 T2f = T1T + T26;
Chris@82 213 T41 = T2Z + T31;
Chris@82 214 T42 = T2N + T2P;
Chris@82 215 T43 = T41 + T42;
Chris@82 216 T4b = T42 - T41;
Chris@82 217 }
Chris@82 218 {
Chris@82 219 E T2L, T2Q, T2X, T32;
Chris@82 220 T2L = T1M - T1S;
Chris@82 221 T2Q = T2N - T2P;
Chris@82 222 T2R = T2L - T2Q;
Chris@82 223 T3z = T2L + T2Q;
Chris@82 224 T2X = T25 - T1Z;
Chris@82 225 T32 = T2Z - T31;
Chris@82 226 T33 = T2X - T32;
Chris@82 227 T3J = T32 + T2X;
Chris@82 228 }
Chris@82 229 }
Chris@82 230 {
Chris@82 231 E T1l, T36, T1E, T2I, T1r, T38, T1y, T2G;
Chris@82 232 {
Chris@82 233 E T1h, T1k, T1i, T35, T1g, T1j;
Chris@82 234 T1h = Rp[WS(rs, 4)];
Chris@82 235 T1k = Rm[WS(rs, 4)];
Chris@82 236 T1g = W[14];
Chris@82 237 T1i = T1g * T1h;
Chris@82 238 T35 = T1g * T1k;
Chris@82 239 T1j = W[15];
Chris@82 240 T1l = FMA(T1j, T1k, T1i);
Chris@82 241 T36 = FNMS(T1j, T1h, T35);
Chris@82 242 }
Chris@82 243 {
Chris@82 244 E T1A, T1D, T1B, T2H, T1z, T1C;
Chris@82 245 T1A = Ip[WS(rs, 1)];
Chris@82 246 T1D = Im[WS(rs, 1)];
Chris@82 247 T1z = W[4];
Chris@82 248 T1B = T1z * T1A;
Chris@82 249 T2H = T1z * T1D;
Chris@82 250 T1C = W[5];
Chris@82 251 T1E = FMA(T1C, T1D, T1B);
Chris@82 252 T2I = FNMS(T1C, T1A, T2H);
Chris@82 253 }
Chris@82 254 {
Chris@82 255 E T1n, T1q, T1o, T37, T1m, T1p;
Chris@82 256 T1n = Rp[WS(rs, 9)];
Chris@82 257 T1q = Rm[WS(rs, 9)];
Chris@82 258 T1m = W[34];
Chris@82 259 T1o = T1m * T1n;
Chris@82 260 T37 = T1m * T1q;
Chris@82 261 T1p = W[35];
Chris@82 262 T1r = FMA(T1p, T1q, T1o);
Chris@82 263 T38 = FNMS(T1p, T1n, T37);
Chris@82 264 }
Chris@82 265 {
Chris@82 266 E T1u, T1x, T1v, T2F, T1t, T1w;
Chris@82 267 T1u = Ip[WS(rs, 6)];
Chris@82 268 T1x = Im[WS(rs, 6)];
Chris@82 269 T1t = W[24];
Chris@82 270 T1v = T1t * T1u;
Chris@82 271 T2F = T1t * T1x;
Chris@82 272 T1w = W[25];
Chris@82 273 T1y = FMA(T1w, T1x, T1v);
Chris@82 274 T2G = FNMS(T1w, T1u, T2F);
Chris@82 275 }
Chris@82 276 {
Chris@82 277 E T1s, T1F, T3Y, T3Z;
Chris@82 278 T1s = T1l + T1r;
Chris@82 279 T1F = T1y + T1E;
Chris@82 280 T1G = T1s - T1F;
Chris@82 281 T2e = T1s + T1F;
Chris@82 282 T3Y = T36 + T38;
Chris@82 283 T3Z = T2G + T2I;
Chris@82 284 T40 = T3Y + T3Z;
Chris@82 285 T4c = T3Z - T3Y;
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T2E, T2J, T34, T39;
Chris@82 289 T2E = T1l - T1r;
Chris@82 290 T2J = T2G - T2I;
Chris@82 291 T2K = T2E - T2J;
Chris@82 292 T3y = T2E + T2J;
Chris@82 293 T34 = T1E - T1y;
Chris@82 294 T39 = T36 - T38;
Chris@82 295 T3a = T34 - T39;
Chris@82 296 T3I = T39 + T34;
Chris@82 297 }
Chris@82 298 }
Chris@82 299 {
Chris@82 300 E TT, T3e, T1c, T2A, TZ, T3g, T16, T2y;
Chris@82 301 {
Chris@82 302 E TP, TS, TQ, T3d, TO, TR;
Chris@82 303 TP = Rp[WS(rs, 8)];
Chris@82 304 TS = Rm[WS(rs, 8)];
Chris@82 305 TO = W[30];
Chris@82 306 TQ = TO * TP;
Chris@82 307 T3d = TO * TS;
Chris@82 308 TR = W[31];
Chris@82 309 TT = FMA(TR, TS, TQ);
Chris@82 310 T3e = FNMS(TR, TP, T3d);
Chris@82 311 }
Chris@82 312 {
Chris@82 313 E T18, T1b, T19, T2z, T17, T1a;
Chris@82 314 T18 = Ip[WS(rs, 5)];
Chris@82 315 T1b = Im[WS(rs, 5)];
Chris@82 316 T17 = W[20];
Chris@82 317 T19 = T17 * T18;
Chris@82 318 T2z = T17 * T1b;
Chris@82 319 T1a = W[21];
Chris@82 320 T1c = FMA(T1a, T1b, T19);
Chris@82 321 T2A = FNMS(T1a, T18, T2z);
Chris@82 322 }
Chris@82 323 {
Chris@82 324 E TV, TY, TW, T3f, TU, TX;
Chris@82 325 TV = Rp[WS(rs, 3)];
Chris@82 326 TY = Rm[WS(rs, 3)];
Chris@82 327 TU = W[10];
Chris@82 328 TW = TU * TV;
Chris@82 329 T3f = TU * TY;
Chris@82 330 TX = W[11];
Chris@82 331 TZ = FMA(TX, TY, TW);
Chris@82 332 T3g = FNMS(TX, TV, T3f);
Chris@82 333 }
Chris@82 334 {
Chris@82 335 E T12, T15, T13, T2x, T11, T14;
Chris@82 336 T12 = Ip[0];
Chris@82 337 T15 = Im[0];
Chris@82 338 T11 = W[0];
Chris@82 339 T13 = T11 * T12;
Chris@82 340 T2x = T11 * T15;
Chris@82 341 T14 = W[1];
Chris@82 342 T16 = FMA(T14, T15, T13);
Chris@82 343 T2y = FNMS(T14, T12, T2x);
Chris@82 344 }
Chris@82 345 {
Chris@82 346 E T10, T1d, T3U, T3V;
Chris@82 347 T10 = TT + TZ;
Chris@82 348 T1d = T16 + T1c;
Chris@82 349 T1e = T10 - T1d;
Chris@82 350 T2c = T10 + T1d;
Chris@82 351 T3U = T3e + T3g;
Chris@82 352 T3V = T2y + T2A;
Chris@82 353 T3W = T3U + T3V;
Chris@82 354 T4e = T3V - T3U;
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T2w, T2B, T3c, T3h;
Chris@82 358 T2w = TT - TZ;
Chris@82 359 T2B = T2y - T2A;
Chris@82 360 T2C = T2w - T2B;
Chris@82 361 T3w = T2w + T2B;
Chris@82 362 T3c = T1c - T16;
Chris@82 363 T3h = T3e - T3g;
Chris@82 364 T3i = T3c - T3h;
Chris@82 365 T3G = T3h + T3c;
Chris@82 366 }
Chris@82 367 }
Chris@82 368 {
Chris@82 369 E T4h, T4j, Tm, T29, T48, T49, T4i, T4a;
Chris@82 370 {
Chris@82 371 E T4d, T4g, T1f, T28;
Chris@82 372 T4d = T4b - T4c;
Chris@82 373 T4g = T4e - T4f;
Chris@82 374 T4h = FNMS(KP618033988, T4g, T4d);
Chris@82 375 T4j = FMA(KP618033988, T4d, T4g);
Chris@82 376 Tm = T8 - Tl;
Chris@82 377 T1f = TN + T1e;
Chris@82 378 T28 = T1G + T27;
Chris@82 379 T29 = T1f + T28;
Chris@82 380 T48 = FNMS(KP250000000, T29, Tm);
Chris@82 381 T49 = T1f - T28;
Chris@82 382 }
Chris@82 383 Rm[WS(rs, 9)] = Tm + T29;
Chris@82 384 T4i = FMA(KP559016994, T49, T48);
Chris@82 385 Rm[WS(rs, 5)] = FNMS(KP951056516, T4j, T4i);
Chris@82 386 Rp[WS(rs, 6)] = FMA(KP951056516, T4j, T4i);
Chris@82 387 T4a = FNMS(KP559016994, T49, T48);
Chris@82 388 Rp[WS(rs, 2)] = FNMS(KP951056516, T4h, T4a);
Chris@82 389 Rm[WS(rs, 1)] = FMA(KP951056516, T4h, T4a);
Chris@82 390 }
Chris@82 391 {
Chris@82 392 E T4K, T4M, T4E, T4D, T4F, T4G, T4L, T4H;
Chris@82 393 {
Chris@82 394 E T4I, T4J, T4B, T4C;
Chris@82 395 T4I = T1G - T27;
Chris@82 396 T4J = T1e - TN;
Chris@82 397 T4K = FMA(KP618033988, T4J, T4I);
Chris@82 398 T4M = FNMS(KP618033988, T4I, T4J);
Chris@82 399 T4E = T4r - T4n;
Chris@82 400 T4B = T4f + T4e;
Chris@82 401 T4C = T4c + T4b;
Chris@82 402 T4D = T4B + T4C;
Chris@82 403 T4F = FMA(KP250000000, T4D, T4E);
Chris@82 404 T4G = T4C - T4B;
Chris@82 405 }
Chris@82 406 Im[WS(rs, 9)] = T4D - T4E;
Chris@82 407 T4L = FMA(KP559016994, T4G, T4F);
Chris@82 408 Im[WS(rs, 5)] = FMS(KP951056516, T4M, T4L);
Chris@82 409 Ip[WS(rs, 6)] = FMA(KP951056516, T4M, T4L);
Chris@82 410 T4H = FNMS(KP559016994, T4G, T4F);
Chris@82 411 Im[WS(rs, 1)] = FMS(KP951056516, T4K, T4H);
Chris@82 412 Ip[WS(rs, 2)] = FMA(KP951056516, T4K, T4H);
Chris@82 413 }
Chris@82 414 {
Chris@82 415 E T45, T47, T2a, T2h, T3O, T3P, T46, T3Q;
Chris@82 416 {
Chris@82 417 E T3X, T44, T2d, T2g;
Chris@82 418 T3X = T3T - T3W;
Chris@82 419 T44 = T40 - T43;
Chris@82 420 T45 = FMA(KP618033988, T44, T3X);
Chris@82 421 T47 = FNMS(KP618033988, T3X, T44);
Chris@82 422 T2a = T8 + Tl;
Chris@82 423 T2d = T2b + T2c;
Chris@82 424 T2g = T2e + T2f;
Chris@82 425 T2h = T2d + T2g;
Chris@82 426 T3O = FNMS(KP250000000, T2h, T2a);
Chris@82 427 T3P = T2d - T2g;
Chris@82 428 }
Chris@82 429 Rp[0] = T2a + T2h;
Chris@82 430 T46 = FNMS(KP559016994, T3P, T3O);
Chris@82 431 Rm[WS(rs, 7)] = FNMS(KP951056516, T47, T46);
Chris@82 432 Rp[WS(rs, 8)] = FMA(KP951056516, T47, T46);
Chris@82 433 T3Q = FMA(KP559016994, T3P, T3O);
Chris@82 434 Rp[WS(rs, 4)] = FNMS(KP951056516, T45, T3Q);
Chris@82 435 Rm[WS(rs, 3)] = FMA(KP951056516, T45, T3Q);
Chris@82 436 }
Chris@82 437 {
Chris@82 438 E T4y, T4A, T4s, T4m, T4t, T4u, T4z, T4v;
Chris@82 439 {
Chris@82 440 E T4w, T4x, T4k, T4l;
Chris@82 441 T4w = T2b - T2c;
Chris@82 442 T4x = T2f - T2e;
Chris@82 443 T4y = FNMS(KP618033988, T4x, T4w);
Chris@82 444 T4A = FMA(KP618033988, T4w, T4x);
Chris@82 445 T4s = T4n + T4r;
Chris@82 446 T4k = T3T + T3W;
Chris@82 447 T4l = T40 + T43;
Chris@82 448 T4m = T4k + T4l;
Chris@82 449 T4t = FNMS(KP250000000, T4m, T4s);
Chris@82 450 T4u = T4k - T4l;
Chris@82 451 }
Chris@82 452 Ip[0] = T4m + T4s;
Chris@82 453 T4z = FNMS(KP559016994, T4u, T4t);
Chris@82 454 Im[WS(rs, 7)] = FMS(KP951056516, T4A, T4z);
Chris@82 455 Ip[WS(rs, 8)] = FMA(KP951056516, T4A, T4z);
Chris@82 456 T4v = FMA(KP559016994, T4u, T4t);
Chris@82 457 Im[WS(rs, 3)] = FMS(KP951056516, T4y, T4v);
Chris@82 458 Ip[WS(rs, 4)] = FMA(KP951056516, T4y, T4v);
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E T3r, T3t, T2o, T2T, T2U, T2V, T3s, T2W;
Chris@82 462 {
Chris@82 463 E T3b, T3q, T2D, T2S;
Chris@82 464 T3b = T33 - T3a;
Chris@82 465 T3q = T3i - T3p;
Chris@82 466 T3r = FNMS(KP618033988, T3q, T3b);
Chris@82 467 T3t = FMA(KP618033988, T3b, T3q);
Chris@82 468 T2o = T2i - T2n;
Chris@82 469 T2D = T2v + T2C;
Chris@82 470 T2S = T2K + T2R;
Chris@82 471 T2T = T2D + T2S;
Chris@82 472 T2U = FNMS(KP250000000, T2T, T2o);
Chris@82 473 T2V = T2D - T2S;
Chris@82 474 }
Chris@82 475 Rm[WS(rs, 4)] = T2o + T2T;
Chris@82 476 T3s = FMA(KP559016994, T2V, T2U);
Chris@82 477 Rm[WS(rs, 8)] = FMA(KP951056516, T3t, T3s);
Chris@82 478 Rm[0] = FNMS(KP951056516, T3t, T3s);
Chris@82 479 T2W = FNMS(KP559016994, T2V, T2U);
Chris@82 480 Rp[WS(rs, 3)] = FMA(KP951056516, T3r, T2W);
Chris@82 481 Rp[WS(rs, 7)] = FNMS(KP951056516, T3r, T2W);
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T5a, T5c, T54, T53, T55, T56, T5b, T57;
Chris@82 485 {
Chris@82 486 E T58, T59, T51, T52;
Chris@82 487 T58 = T2v - T2C;
Chris@82 488 T59 = T2K - T2R;
Chris@82 489 T5a = FMA(KP618033988, T59, T58);
Chris@82 490 T5c = FNMS(KP618033988, T58, T59);
Chris@82 491 T54 = T4O + T4N;
Chris@82 492 T51 = T3p + T3i;
Chris@82 493 T52 = T3a + T33;
Chris@82 494 T53 = T51 + T52;
Chris@82 495 T55 = FMA(KP250000000, T53, T54);
Chris@82 496 T56 = T51 - T52;
Chris@82 497 }
Chris@82 498 Im[WS(rs, 4)] = T53 - T54;
Chris@82 499 T5b = FMA(KP559016994, T56, T55);
Chris@82 500 Ip[WS(rs, 3)] = FNMS(KP951056516, T5c, T5b);
Chris@82 501 Ip[WS(rs, 7)] = FMA(KP951056516, T5c, T5b);
Chris@82 502 T57 = FNMS(KP559016994, T56, T55);
Chris@82 503 Im[WS(rs, 8)] = FMS(KP951056516, T5a, T57);
Chris@82 504 Im[0] = -(FMA(KP951056516, T5a, T57));
Chris@82 505 }
Chris@82 506 {
Chris@82 507 E T3L, T3N, T3u, T3B, T3C, T3D, T3M, T3E;
Chris@82 508 {
Chris@82 509 E T3H, T3K, T3x, T3A;
Chris@82 510 T3H = T3F - T3G;
Chris@82 511 T3K = T3I - T3J;
Chris@82 512 T3L = FMA(KP618033988, T3K, T3H);
Chris@82 513 T3N = FNMS(KP618033988, T3H, T3K);
Chris@82 514 T3u = T2i + T2n;
Chris@82 515 T3x = T3v + T3w;
Chris@82 516 T3A = T3y + T3z;
Chris@82 517 T3B = T3x + T3A;
Chris@82 518 T3C = FNMS(KP250000000, T3B, T3u);
Chris@82 519 T3D = T3x - T3A;
Chris@82 520 }
Chris@82 521 Rp[WS(rs, 5)] = T3u + T3B;
Chris@82 522 T3M = FNMS(KP559016994, T3D, T3C);
Chris@82 523 Rm[WS(rs, 6)] = FMA(KP951056516, T3N, T3M);
Chris@82 524 Rm[WS(rs, 2)] = FNMS(KP951056516, T3N, T3M);
Chris@82 525 T3E = FMA(KP559016994, T3D, T3C);
Chris@82 526 Rp[WS(rs, 1)] = FMA(KP951056516, T3L, T3E);
Chris@82 527 Rp[WS(rs, 9)] = FNMS(KP951056516, T3L, T3E);
Chris@82 528 }
Chris@82 529 {
Chris@82 530 E T4Y, T50, T4P, T4S, T4T, T4U, T4Z, T4V;
Chris@82 531 {
Chris@82 532 E T4W, T4X, T4Q, T4R;
Chris@82 533 T4W = T3y - T3z;
Chris@82 534 T4X = T3v - T3w;
Chris@82 535 T4Y = FNMS(KP618033988, T4X, T4W);
Chris@82 536 T50 = FMA(KP618033988, T4W, T4X);
Chris@82 537 T4P = T4N - T4O;
Chris@82 538 T4Q = T3F + T3G;
Chris@82 539 T4R = T3I + T3J;
Chris@82 540 T4S = T4Q + T4R;
Chris@82 541 T4T = FNMS(KP250000000, T4S, T4P);
Chris@82 542 T4U = T4Q - T4R;
Chris@82 543 }
Chris@82 544 Ip[WS(rs, 5)] = T4S + T4P;
Chris@82 545 T4Z = FMA(KP559016994, T4U, T4T);
Chris@82 546 Ip[WS(rs, 1)] = FNMS(KP951056516, T50, T4Z);
Chris@82 547 Ip[WS(rs, 9)] = FMA(KP951056516, T50, T4Z);
Chris@82 548 T4V = FNMS(KP559016994, T4U, T4T);
Chris@82 549 Im[WS(rs, 6)] = FMS(KP951056516, T4Y, T4V);
Chris@82 550 Im[WS(rs, 2)] = -(FMA(KP951056516, T4Y, T4V));
Chris@82 551 }
Chris@82 552 }
Chris@82 553 }
Chris@82 554 }
Chris@82 555
Chris@82 556 static const tw_instr twinstr[] = {
Chris@82 557 {TW_FULL, 1, 20},
Chris@82 558 {TW_NEXT, 1, 0}
Chris@82 559 };
Chris@82 560
Chris@82 561 static const hc2c_desc desc = { 20, "hc2cf_20", twinstr, &GENUS, {136, 38, 110, 0} };
Chris@82 562
Chris@82 563 void X(codelet_hc2cf_20) (planner *p) {
Chris@82 564 X(khc2c_register) (p, hc2cf_20, &desc, HC2C_VIA_RDFT);
Chris@82 565 }
Chris@82 566 #else
Chris@82 567
Chris@82 568 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cf_20 -include rdft/scalar/hc2cf.h */
Chris@82 569
Chris@82 570 /*
Chris@82 571 * This function contains 246 FP additions, 124 FP multiplications,
Chris@82 572 * (or, 184 additions, 62 multiplications, 62 fused multiply/add),
Chris@82 573 * 85 stack variables, 4 constants, and 80 memory accesses
Chris@82 574 */
Chris@82 575 #include "rdft/scalar/hc2cf.h"
Chris@82 576
Chris@82 577 static void hc2cf_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 578 {
Chris@82 579 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 580 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 581 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 582 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 583 {
Chris@82 584 INT m;
Chris@82 585 for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 586 E Tj, T1R, T4j, T4s, T2q, T37, T3Q, T42, T1r, T1O, T1P, T3p, T3s, T3K, T3A;
Chris@82 587 E T3B, T3Z, T1V, T1W, T1X, T23, T28, T4q, T2W, T2X, T4f, T33, T34, T35, T2G;
Chris@82 588 E T2L, T2M, TG, T13, T14, T3i, T3l, T3J, T3D, T3E, T40, T1S, T1T, T1U, T2e;
Chris@82 589 E T2j, T4p, T2T, T2U, T4e, T30, T31, T32, T2v, T2A, T2B;
Chris@82 590 {
Chris@82 591 E T1, T3O, T6, T3N, Tc, T2n, Th, T2o;
Chris@82 592 T1 = Rp[0];
Chris@82 593 T3O = Rm[0];
Chris@82 594 {
Chris@82 595 E T3, T5, T2, T4;
Chris@82 596 T3 = Rp[WS(rs, 5)];
Chris@82 597 T5 = Rm[WS(rs, 5)];
Chris@82 598 T2 = W[18];
Chris@82 599 T4 = W[19];
Chris@82 600 T6 = FMA(T2, T3, T4 * T5);
Chris@82 601 T3N = FNMS(T4, T3, T2 * T5);
Chris@82 602 }
Chris@82 603 {
Chris@82 604 E T9, Tb, T8, Ta;
Chris@82 605 T9 = Ip[WS(rs, 2)];
Chris@82 606 Tb = Im[WS(rs, 2)];
Chris@82 607 T8 = W[8];
Chris@82 608 Ta = W[9];
Chris@82 609 Tc = FMA(T8, T9, Ta * Tb);
Chris@82 610 T2n = FNMS(Ta, T9, T8 * Tb);
Chris@82 611 }
Chris@82 612 {
Chris@82 613 E Te, Tg, Td, Tf;
Chris@82 614 Te = Ip[WS(rs, 7)];
Chris@82 615 Tg = Im[WS(rs, 7)];
Chris@82 616 Td = W[28];
Chris@82 617 Tf = W[29];
Chris@82 618 Th = FMA(Td, Te, Tf * Tg);
Chris@82 619 T2o = FNMS(Tf, Te, Td * Tg);
Chris@82 620 }
Chris@82 621 {
Chris@82 622 E T7, Ti, T4h, T4i;
Chris@82 623 T7 = T1 + T6;
Chris@82 624 Ti = Tc + Th;
Chris@82 625 Tj = T7 - Ti;
Chris@82 626 T1R = T7 + Ti;
Chris@82 627 T4h = T3O - T3N;
Chris@82 628 T4i = Tc - Th;
Chris@82 629 T4j = T4h - T4i;
Chris@82 630 T4s = T4i + T4h;
Chris@82 631 }
Chris@82 632 {
Chris@82 633 E T2m, T2p, T3M, T3P;
Chris@82 634 T2m = T1 - T6;
Chris@82 635 T2p = T2n - T2o;
Chris@82 636 T2q = T2m - T2p;
Chris@82 637 T37 = T2m + T2p;
Chris@82 638 T3M = T2n + T2o;
Chris@82 639 T3P = T3N + T3O;
Chris@82 640 T3Q = T3M + T3P;
Chris@82 641 T42 = T3P - T3M;
Chris@82 642 }
Chris@82 643 }
Chris@82 644 {
Chris@82 645 E T1f, T3n, T21, T2C, T1N, T3r, T27, T2K, T1q, T3o, T22, T2F, T1C, T3q, T26;
Chris@82 646 E T2H;
Chris@82 647 {
Chris@82 648 E T19, T1Z, T1e, T20;
Chris@82 649 {
Chris@82 650 E T16, T18, T15, T17;
Chris@82 651 T16 = Rp[WS(rs, 4)];
Chris@82 652 T18 = Rm[WS(rs, 4)];
Chris@82 653 T15 = W[14];
Chris@82 654 T17 = W[15];
Chris@82 655 T19 = FMA(T15, T16, T17 * T18);
Chris@82 656 T1Z = FNMS(T17, T16, T15 * T18);
Chris@82 657 }
Chris@82 658 {
Chris@82 659 E T1b, T1d, T1a, T1c;
Chris@82 660 T1b = Rp[WS(rs, 9)];
Chris@82 661 T1d = Rm[WS(rs, 9)];
Chris@82 662 T1a = W[34];
Chris@82 663 T1c = W[35];
Chris@82 664 T1e = FMA(T1a, T1b, T1c * T1d);
Chris@82 665 T20 = FNMS(T1c, T1b, T1a * T1d);
Chris@82 666 }
Chris@82 667 T1f = T19 + T1e;
Chris@82 668 T3n = T1Z + T20;
Chris@82 669 T21 = T1Z - T20;
Chris@82 670 T2C = T19 - T1e;
Chris@82 671 }
Chris@82 672 {
Chris@82 673 E T1H, T2I, T1M, T2J;
Chris@82 674 {
Chris@82 675 E T1E, T1G, T1D, T1F;
Chris@82 676 T1E = Ip[WS(rs, 8)];
Chris@82 677 T1G = Im[WS(rs, 8)];
Chris@82 678 T1D = W[32];
Chris@82 679 T1F = W[33];
Chris@82 680 T1H = FMA(T1D, T1E, T1F * T1G);
Chris@82 681 T2I = FNMS(T1F, T1E, T1D * T1G);
Chris@82 682 }
Chris@82 683 {
Chris@82 684 E T1J, T1L, T1I, T1K;
Chris@82 685 T1J = Ip[WS(rs, 3)];
Chris@82 686 T1L = Im[WS(rs, 3)];
Chris@82 687 T1I = W[12];
Chris@82 688 T1K = W[13];
Chris@82 689 T1M = FMA(T1I, T1J, T1K * T1L);
Chris@82 690 T2J = FNMS(T1K, T1J, T1I * T1L);
Chris@82 691 }
Chris@82 692 T1N = T1H + T1M;
Chris@82 693 T3r = T2I + T2J;
Chris@82 694 T27 = T1H - T1M;
Chris@82 695 T2K = T2I - T2J;
Chris@82 696 }
Chris@82 697 {
Chris@82 698 E T1k, T2D, T1p, T2E;
Chris@82 699 {
Chris@82 700 E T1h, T1j, T1g, T1i;
Chris@82 701 T1h = Ip[WS(rs, 6)];
Chris@82 702 T1j = Im[WS(rs, 6)];
Chris@82 703 T1g = W[24];
Chris@82 704 T1i = W[25];
Chris@82 705 T1k = FMA(T1g, T1h, T1i * T1j);
Chris@82 706 T2D = FNMS(T1i, T1h, T1g * T1j);
Chris@82 707 }
Chris@82 708 {
Chris@82 709 E T1m, T1o, T1l, T1n;
Chris@82 710 T1m = Ip[WS(rs, 1)];
Chris@82 711 T1o = Im[WS(rs, 1)];
Chris@82 712 T1l = W[4];
Chris@82 713 T1n = W[5];
Chris@82 714 T1p = FMA(T1l, T1m, T1n * T1o);
Chris@82 715 T2E = FNMS(T1n, T1m, T1l * T1o);
Chris@82 716 }
Chris@82 717 T1q = T1k + T1p;
Chris@82 718 T3o = T2D + T2E;
Chris@82 719 T22 = T1k - T1p;
Chris@82 720 T2F = T2D - T2E;
Chris@82 721 }
Chris@82 722 {
Chris@82 723 E T1w, T24, T1B, T25;
Chris@82 724 {
Chris@82 725 E T1t, T1v, T1s, T1u;
Chris@82 726 T1t = Rp[WS(rs, 6)];
Chris@82 727 T1v = Rm[WS(rs, 6)];
Chris@82 728 T1s = W[22];
Chris@82 729 T1u = W[23];
Chris@82 730 T1w = FMA(T1s, T1t, T1u * T1v);
Chris@82 731 T24 = FNMS(T1u, T1t, T1s * T1v);
Chris@82 732 }
Chris@82 733 {
Chris@82 734 E T1y, T1A, T1x, T1z;
Chris@82 735 T1y = Rp[WS(rs, 1)];
Chris@82 736 T1A = Rm[WS(rs, 1)];
Chris@82 737 T1x = W[2];
Chris@82 738 T1z = W[3];
Chris@82 739 T1B = FMA(T1x, T1y, T1z * T1A);
Chris@82 740 T25 = FNMS(T1z, T1y, T1x * T1A);
Chris@82 741 }
Chris@82 742 T1C = T1w + T1B;
Chris@82 743 T3q = T24 + T25;
Chris@82 744 T26 = T24 - T25;
Chris@82 745 T2H = T1w - T1B;
Chris@82 746 }
Chris@82 747 T1r = T1f - T1q;
Chris@82 748 T1O = T1C - T1N;
Chris@82 749 T1P = T1r + T1O;
Chris@82 750 T3p = T3n + T3o;
Chris@82 751 T3s = T3q + T3r;
Chris@82 752 T3K = T3p + T3s;
Chris@82 753 T3A = T3n - T3o;
Chris@82 754 T3B = T3r - T3q;
Chris@82 755 T3Z = T3B - T3A;
Chris@82 756 T1V = T1f + T1q;
Chris@82 757 T1W = T1C + T1N;
Chris@82 758 T1X = T1V + T1W;
Chris@82 759 T23 = T21 + T22;
Chris@82 760 T28 = T26 + T27;
Chris@82 761 T4q = T23 + T28;
Chris@82 762 T2W = T21 - T22;
Chris@82 763 T2X = T26 - T27;
Chris@82 764 T4f = T2W + T2X;
Chris@82 765 T33 = T2C + T2F;
Chris@82 766 T34 = T2H + T2K;
Chris@82 767 T35 = T33 + T34;
Chris@82 768 T2G = T2C - T2F;
Chris@82 769 T2L = T2H - T2K;
Chris@82 770 T2M = T2G + T2L;
Chris@82 771 }
Chris@82 772 {
Chris@82 773 E Tu, T3g, T2c, T2r, T12, T3k, T2f, T2z, TF, T3h, T2d, T2u, TR, T3j, T2i;
Chris@82 774 E T2w;
Chris@82 775 {
Chris@82 776 E To, T2a, Tt, T2b;
Chris@82 777 {
Chris@82 778 E Tl, Tn, Tk, Tm;
Chris@82 779 Tl = Rp[WS(rs, 2)];
Chris@82 780 Tn = Rm[WS(rs, 2)];
Chris@82 781 Tk = W[6];
Chris@82 782 Tm = W[7];
Chris@82 783 To = FMA(Tk, Tl, Tm * Tn);
Chris@82 784 T2a = FNMS(Tm, Tl, Tk * Tn);
Chris@82 785 }
Chris@82 786 {
Chris@82 787 E Tq, Ts, Tp, Tr;
Chris@82 788 Tq = Rp[WS(rs, 7)];
Chris@82 789 Ts = Rm[WS(rs, 7)];
Chris@82 790 Tp = W[26];
Chris@82 791 Tr = W[27];
Chris@82 792 Tt = FMA(Tp, Tq, Tr * Ts);
Chris@82 793 T2b = FNMS(Tr, Tq, Tp * Ts);
Chris@82 794 }
Chris@82 795 Tu = To + Tt;
Chris@82 796 T3g = T2a + T2b;
Chris@82 797 T2c = T2a - T2b;
Chris@82 798 T2r = To - Tt;
Chris@82 799 }
Chris@82 800 {
Chris@82 801 E TW, T2x, T11, T2y;
Chris@82 802 {
Chris@82 803 E TT, TV, TS, TU;
Chris@82 804 TT = Ip[0];
Chris@82 805 TV = Im[0];
Chris@82 806 TS = W[0];
Chris@82 807 TU = W[1];
Chris@82 808 TW = FMA(TS, TT, TU * TV);
Chris@82 809 T2x = FNMS(TU, TT, TS * TV);
Chris@82 810 }
Chris@82 811 {
Chris@82 812 E TY, T10, TX, TZ;
Chris@82 813 TY = Ip[WS(rs, 5)];
Chris@82 814 T10 = Im[WS(rs, 5)];
Chris@82 815 TX = W[20];
Chris@82 816 TZ = W[21];
Chris@82 817 T11 = FMA(TX, TY, TZ * T10);
Chris@82 818 T2y = FNMS(TZ, TY, TX * T10);
Chris@82 819 }
Chris@82 820 T12 = TW + T11;
Chris@82 821 T3k = T2x + T2y;
Chris@82 822 T2f = T11 - TW;
Chris@82 823 T2z = T2x - T2y;
Chris@82 824 }
Chris@82 825 {
Chris@82 826 E Tz, T2s, TE, T2t;
Chris@82 827 {
Chris@82 828 E Tw, Ty, Tv, Tx;
Chris@82 829 Tw = Ip[WS(rs, 4)];
Chris@82 830 Ty = Im[WS(rs, 4)];
Chris@82 831 Tv = W[16];
Chris@82 832 Tx = W[17];
Chris@82 833 Tz = FMA(Tv, Tw, Tx * Ty);
Chris@82 834 T2s = FNMS(Tx, Tw, Tv * Ty);
Chris@82 835 }
Chris@82 836 {
Chris@82 837 E TB, TD, TA, TC;
Chris@82 838 TB = Ip[WS(rs, 9)];
Chris@82 839 TD = Im[WS(rs, 9)];
Chris@82 840 TA = W[36];
Chris@82 841 TC = W[37];
Chris@82 842 TE = FMA(TA, TB, TC * TD);
Chris@82 843 T2t = FNMS(TC, TB, TA * TD);
Chris@82 844 }
Chris@82 845 TF = Tz + TE;
Chris@82 846 T3h = T2s + T2t;
Chris@82 847 T2d = Tz - TE;
Chris@82 848 T2u = T2s - T2t;
Chris@82 849 }
Chris@82 850 {
Chris@82 851 E TL, T2g, TQ, T2h;
Chris@82 852 {
Chris@82 853 E TI, TK, TH, TJ;
Chris@82 854 TI = Rp[WS(rs, 8)];
Chris@82 855 TK = Rm[WS(rs, 8)];
Chris@82 856 TH = W[30];
Chris@82 857 TJ = W[31];
Chris@82 858 TL = FMA(TH, TI, TJ * TK);
Chris@82 859 T2g = FNMS(TJ, TI, TH * TK);
Chris@82 860 }
Chris@82 861 {
Chris@82 862 E TN, TP, TM, TO;
Chris@82 863 TN = Rp[WS(rs, 3)];
Chris@82 864 TP = Rm[WS(rs, 3)];
Chris@82 865 TM = W[10];
Chris@82 866 TO = W[11];
Chris@82 867 TQ = FMA(TM, TN, TO * TP);
Chris@82 868 T2h = FNMS(TO, TN, TM * TP);
Chris@82 869 }
Chris@82 870 TR = TL + TQ;
Chris@82 871 T3j = T2g + T2h;
Chris@82 872 T2i = T2g - T2h;
Chris@82 873 T2w = TL - TQ;
Chris@82 874 }
Chris@82 875 TG = Tu - TF;
Chris@82 876 T13 = TR - T12;
Chris@82 877 T14 = TG + T13;
Chris@82 878 T3i = T3g + T3h;
Chris@82 879 T3l = T3j + T3k;
Chris@82 880 T3J = T3i + T3l;
Chris@82 881 T3D = T3g - T3h;
Chris@82 882 T3E = T3j - T3k;
Chris@82 883 T40 = T3D + T3E;
Chris@82 884 T1S = Tu + TF;
Chris@82 885 T1T = TR + T12;
Chris@82 886 T1U = T1S + T1T;
Chris@82 887 T2e = T2c + T2d;
Chris@82 888 T2j = T2f - T2i;
Chris@82 889 T4p = T2j - T2e;
Chris@82 890 T2T = T2c - T2d;
Chris@82 891 T2U = T2i + T2f;
Chris@82 892 T4e = T2T + T2U;
Chris@82 893 T30 = T2r + T2u;
Chris@82 894 T31 = T2w + T2z;
Chris@82 895 T32 = T30 + T31;
Chris@82 896 T2v = T2r - T2u;
Chris@82 897 T2A = T2w - T2z;
Chris@82 898 T2B = T2v + T2A;
Chris@82 899 }
Chris@82 900 {
Chris@82 901 E T3y, T1Q, T3x, T3G, T3I, T3C, T3F, T3H, T3z;
Chris@82 902 T3y = KP559016994 * (T14 - T1P);
Chris@82 903 T1Q = T14 + T1P;
Chris@82 904 T3x = FNMS(KP250000000, T1Q, Tj);
Chris@82 905 T3C = T3A + T3B;
Chris@82 906 T3F = T3D - T3E;
Chris@82 907 T3G = FNMS(KP587785252, T3F, KP951056516 * T3C);
Chris@82 908 T3I = FMA(KP951056516, T3F, KP587785252 * T3C);
Chris@82 909 Rm[WS(rs, 9)] = Tj + T1Q;
Chris@82 910 T3H = T3y + T3x;
Chris@82 911 Rm[WS(rs, 5)] = T3H - T3I;
Chris@82 912 Rp[WS(rs, 6)] = T3H + T3I;
Chris@82 913 T3z = T3x - T3y;
Chris@82 914 Rp[WS(rs, 2)] = T3z - T3G;
Chris@82 915 Rm[WS(rs, 1)] = T3z + T3G;
Chris@82 916 }
Chris@82 917 {
Chris@82 918 E T47, T41, T46, T45, T49, T43, T44, T4a, T48;
Chris@82 919 T47 = KP559016994 * (T40 + T3Z);
Chris@82 920 T41 = T3Z - T40;
Chris@82 921 T46 = FMA(KP250000000, T41, T42);
Chris@82 922 T43 = T13 - TG;
Chris@82 923 T44 = T1r - T1O;
Chris@82 924 T45 = FMA(KP587785252, T43, KP951056516 * T44);
Chris@82 925 T49 = FNMS(KP587785252, T44, KP951056516 * T43);
Chris@82 926 Im[WS(rs, 9)] = T41 - T42;
Chris@82 927 T4a = T47 + T46;
Chris@82 928 Im[WS(rs, 5)] = T49 - T4a;
Chris@82 929 Ip[WS(rs, 6)] = T49 + T4a;
Chris@82 930 T48 = T46 - T47;
Chris@82 931 Im[WS(rs, 1)] = T45 - T48;
Chris@82 932 Ip[WS(rs, 2)] = T45 + T48;
Chris@82 933 }
Chris@82 934 {
Chris@82 935 E T3d, T1Y, T3e, T3u, T3w, T3m, T3t, T3v, T3f;
Chris@82 936 T3d = KP559016994 * (T1U - T1X);
Chris@82 937 T1Y = T1U + T1X;
Chris@82 938 T3e = FNMS(KP250000000, T1Y, T1R);
Chris@82 939 T3m = T3i - T3l;
Chris@82 940 T3t = T3p - T3s;
Chris@82 941 T3u = FMA(KP951056516, T3m, KP587785252 * T3t);
Chris@82 942 T3w = FNMS(KP587785252, T3m, KP951056516 * T3t);
Chris@82 943 Rp[0] = T1R + T1Y;
Chris@82 944 T3v = T3e - T3d;
Chris@82 945 Rm[WS(rs, 7)] = T3v - T3w;
Chris@82 946 Rp[WS(rs, 8)] = T3v + T3w;
Chris@82 947 T3f = T3d + T3e;
Chris@82 948 Rp[WS(rs, 4)] = T3f - T3u;
Chris@82 949 Rm[WS(rs, 3)] = T3f + T3u;
Chris@82 950 }
Chris@82 951 {
Chris@82 952 E T3U, T3L, T3V, T3T, T3X, T3R, T3S, T3Y, T3W;
Chris@82 953 T3U = KP559016994 * (T3J - T3K);
Chris@82 954 T3L = T3J + T3K;
Chris@82 955 T3V = FNMS(KP250000000, T3L, T3Q);
Chris@82 956 T3R = T1S - T1T;
Chris@82 957 T3S = T1V - T1W;
Chris@82 958 T3T = FMA(KP951056516, T3R, KP587785252 * T3S);
Chris@82 959 T3X = FNMS(KP951056516, T3S, KP587785252 * T3R);
Chris@82 960 Ip[0] = T3L + T3Q;
Chris@82 961 T3Y = T3V - T3U;
Chris@82 962 Im[WS(rs, 7)] = T3X - T3Y;
Chris@82 963 Ip[WS(rs, 8)] = T3X + T3Y;
Chris@82 964 T3W = T3U + T3V;
Chris@82 965 Im[WS(rs, 3)] = T3T - T3W;
Chris@82 966 Ip[WS(rs, 4)] = T3T + T3W;
Chris@82 967 }
Chris@82 968 {
Chris@82 969 E T2P, T2N, T2O, T2l, T2R, T29, T2k, T2S, T2Q;
Chris@82 970 T2P = KP559016994 * (T2B - T2M);
Chris@82 971 T2N = T2B + T2M;
Chris@82 972 T2O = FNMS(KP250000000, T2N, T2q);
Chris@82 973 T29 = T23 - T28;
Chris@82 974 T2k = T2e + T2j;
Chris@82 975 T2l = FNMS(KP587785252, T2k, KP951056516 * T29);
Chris@82 976 T2R = FMA(KP951056516, T2k, KP587785252 * T29);
Chris@82 977 Rm[WS(rs, 4)] = T2q + T2N;
Chris@82 978 T2S = T2P + T2O;
Chris@82 979 Rm[WS(rs, 8)] = T2R + T2S;
Chris@82 980 Rm[0] = T2S - T2R;
Chris@82 981 T2Q = T2O - T2P;
Chris@82 982 Rp[WS(rs, 3)] = T2l + T2Q;
Chris@82 983 Rp[WS(rs, 7)] = T2Q - T2l;
Chris@82 984 }
Chris@82 985 {
Chris@82 986 E T4w, T4r, T4x, T4v, T4A, T4t, T4u, T4z, T4y;
Chris@82 987 T4w = KP559016994 * (T4p + T4q);
Chris@82 988 T4r = T4p - T4q;
Chris@82 989 T4x = FMA(KP250000000, T4r, T4s);
Chris@82 990 T4t = T2v - T2A;
Chris@82 991 T4u = T2G - T2L;
Chris@82 992 T4v = FMA(KP951056516, T4t, KP587785252 * T4u);
Chris@82 993 T4A = FNMS(KP587785252, T4t, KP951056516 * T4u);
Chris@82 994 Im[WS(rs, 4)] = T4r - T4s;
Chris@82 995 T4z = T4w + T4x;
Chris@82 996 Ip[WS(rs, 3)] = T4z - T4A;
Chris@82 997 Ip[WS(rs, 7)] = T4A + T4z;
Chris@82 998 T4y = T4w - T4x;
Chris@82 999 Im[WS(rs, 8)] = T4v + T4y;
Chris@82 1000 Im[0] = T4y - T4v;
Chris@82 1001 }
Chris@82 1002 {
Chris@82 1003 E T36, T38, T39, T2Z, T3b, T2V, T2Y, T3c, T3a;
Chris@82 1004 T36 = KP559016994 * (T32 - T35);
Chris@82 1005 T38 = T32 + T35;
Chris@82 1006 T39 = FNMS(KP250000000, T38, T37);
Chris@82 1007 T2V = T2T - T2U;
Chris@82 1008 T2Y = T2W - T2X;
Chris@82 1009 T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y);
Chris@82 1010 T3b = FNMS(KP587785252, T2V, KP951056516 * T2Y);
Chris@82 1011 Rp[WS(rs, 5)] = T37 + T38;
Chris@82 1012 T3c = T39 - T36;
Chris@82 1013 Rm[WS(rs, 6)] = T3b + T3c;
Chris@82 1014 Rm[WS(rs, 2)] = T3c - T3b;
Chris@82 1015 T3a = T36 + T39;
Chris@82 1016 Rp[WS(rs, 1)] = T2Z + T3a;
Chris@82 1017 Rp[WS(rs, 9)] = T3a - T2Z;
Chris@82 1018 }
Chris@82 1019 {
Chris@82 1020 E T4g, T4k, T4l, T4d, T4o, T4b, T4c, T4n, T4m;
Chris@82 1021 T4g = KP559016994 * (T4e - T4f);
Chris@82 1022 T4k = T4e + T4f;
Chris@82 1023 T4l = FNMS(KP250000000, T4k, T4j);
Chris@82 1024 T4b = T33 - T34;
Chris@82 1025 T4c = T30 - T31;
Chris@82 1026 T4d = FNMS(KP587785252, T4c, KP951056516 * T4b);
Chris@82 1027 T4o = FMA(KP951056516, T4c, KP587785252 * T4b);
Chris@82 1028 Ip[WS(rs, 5)] = T4k + T4j;
Chris@82 1029 T4n = T4g + T4l;
Chris@82 1030 Ip[WS(rs, 1)] = T4n - T4o;
Chris@82 1031 Ip[WS(rs, 9)] = T4o + T4n;
Chris@82 1032 T4m = T4g - T4l;
Chris@82 1033 Im[WS(rs, 6)] = T4d + T4m;
Chris@82 1034 Im[WS(rs, 2)] = T4m - T4d;
Chris@82 1035 }
Chris@82 1036 }
Chris@82 1037 }
Chris@82 1038 }
Chris@82 1039
Chris@82 1040 static const tw_instr twinstr[] = {
Chris@82 1041 {TW_FULL, 1, 20},
Chris@82 1042 {TW_NEXT, 1, 0}
Chris@82 1043 };
Chris@82 1044
Chris@82 1045 static const hc2c_desc desc = { 20, "hc2cf_20", twinstr, &GENUS, {184, 62, 62, 0} };
Chris@82 1046
Chris@82 1047 void X(codelet_hc2cf_20) (planner *p) {
Chris@82 1048 X(khc2c_register) (p, hc2cf_20, &desc, HC2C_VIA_RDFT);
Chris@82 1049 }
Chris@82 1050 #endif