annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb2_20.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:57 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hc2cb2_20 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 276 FP additions, 198 FP multiplications,
Chris@82 32 * (or, 136 additions, 58 multiplications, 140 fused multiply/add),
Chris@82 33 * 129 stack variables, 4 constants, and 80 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 46 E TD, TH, TE, T1L, T1N, T1X, TG, T29, TI, T2b, T1V, T1O, T24, T36, T5b;
Chris@82 47 E T1S, T1Y, T3b, T3e, T2o, T2Y, T2U, T31, T2s, T4y, T4u, T2f, T2c, T2g, T5g;
Chris@82 48 E T2k, T1s, T48, T4c, T5q, T5m, T4k, T4f;
Chris@82 49 {
Chris@82 50 E T1r, T1M, T2T, T1R, T2X, T23, T2r, T1W, T2n, T2a, TF, T4x;
Chris@82 51 TD = W[0];
Chris@82 52 TH = W[3];
Chris@82 53 TE = W[2];
Chris@82 54 TF = TD * TE;
Chris@82 55 T1r = TD * TH;
Chris@82 56 T1L = W[6];
Chris@82 57 T1M = TD * T1L;
Chris@82 58 T2T = TE * T1L;
Chris@82 59 T1N = W[7];
Chris@82 60 T1R = TD * T1N;
Chris@82 61 T2X = TE * T1N;
Chris@82 62 T1X = W[5];
Chris@82 63 T23 = TE * T1X;
Chris@82 64 T2r = TD * T1X;
Chris@82 65 TG = W[1];
Chris@82 66 T29 = FNMS(TG, TH, TF);
Chris@82 67 TI = FMA(TG, TH, TF);
Chris@82 68 T2b = FMA(TG, TE, T1r);
Chris@82 69 T1V = W[4];
Chris@82 70 T1W = TE * T1V;
Chris@82 71 T2n = TD * T1V;
Chris@82 72 T2a = T29 * T1V;
Chris@82 73 T1O = FMA(TG, T1N, T1M);
Chris@82 74 T24 = FNMS(TH, T1V, T23);
Chris@82 75 T36 = FNMS(TG, T1V, T2r);
Chris@82 76 T5b = FNMS(T2b, T1X, T2a);
Chris@82 77 T1S = FNMS(TG, T1L, T1R);
Chris@82 78 T1Y = FMA(TH, T1X, T1W);
Chris@82 79 T3b = FNMS(TH, T1X, T1W);
Chris@82 80 T3e = FMA(TH, T1V, T23);
Chris@82 81 T2o = FNMS(TG, T1X, T2n);
Chris@82 82 T2Y = FNMS(TH, T1L, T2X);
Chris@82 83 T2U = FMA(TH, T1N, T2T);
Chris@82 84 T31 = FMA(TG, T1X, T2n);
Chris@82 85 T2s = FMA(TG, T1V, T2r);
Chris@82 86 T4x = T29 * T1N;
Chris@82 87 T4y = FNMS(T2b, T1L, T4x);
Chris@82 88 {
Chris@82 89 E T4t, T2e, T2d, T2j;
Chris@82 90 T4t = T29 * T1L;
Chris@82 91 T4u = FMA(T2b, T1N, T4t);
Chris@82 92 T2e = T29 * T1X;
Chris@82 93 T2f = FNMS(T2b, T1V, T2e);
Chris@82 94 T2c = FMA(T2b, T1X, T2a);
Chris@82 95 T2d = T2c * T1L;
Chris@82 96 T2j = T2c * T1N;
Chris@82 97 T2g = FMA(T2f, T1N, T2d);
Chris@82 98 T5g = FMA(T2b, T1V, T2e);
Chris@82 99 T2k = FNMS(T2f, T1L, T2j);
Chris@82 100 {
Chris@82 101 E T47, T5p, T4b, T5l;
Chris@82 102 T47 = TI * T1V;
Chris@82 103 T5p = TI * T1N;
Chris@82 104 T4b = TI * T1X;
Chris@82 105 T5l = TI * T1L;
Chris@82 106 T1s = FNMS(TG, TE, T1r);
Chris@82 107 T48 = FMA(T1s, T1X, T47);
Chris@82 108 T4c = FNMS(T1s, T1V, T4b);
Chris@82 109 T5q = FNMS(T1s, T1L, T5p);
Chris@82 110 T5m = FMA(T1s, T1N, T5l);
Chris@82 111 T4k = FMA(T1s, T1V, T4b);
Chris@82 112 T4f = FNMS(T1s, T1X, T47);
Chris@82 113 }
Chris@82 114 }
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E T7, T4B, T4V, TJ, T1z, T3j, T3V, T2H, T18, T42, T43, T1n, T2D, T53, T52;
Chris@82 118 E T2A, T1H, T4R, T4O, T1G, T2O, T3I, T2P, T3P, T2I, T2J, T2K, T1A, T1B, T1C;
Chris@82 119 E TC, T2w, T3Y, T40, T4I, T4K, TQ, TS, T3y, T3A, T4Y, T50;
Chris@82 120 {
Chris@82 121 E T3, T3h, T1v, T3T, T6, T3U, T1y, T3i;
Chris@82 122 {
Chris@82 123 E T1, T2, T1t, T1u;
Chris@82 124 T1 = Rp[0];
Chris@82 125 T2 = Rm[WS(rs, 9)];
Chris@82 126 T3 = T1 + T2;
Chris@82 127 T3h = T1 - T2;
Chris@82 128 T1t = Ip[0];
Chris@82 129 T1u = Im[WS(rs, 9)];
Chris@82 130 T1v = T1t - T1u;
Chris@82 131 T3T = T1t + T1u;
Chris@82 132 }
Chris@82 133 {
Chris@82 134 E T4, T5, T1w, T1x;
Chris@82 135 T4 = Rp[WS(rs, 5)];
Chris@82 136 T5 = Rm[WS(rs, 4)];
Chris@82 137 T6 = T4 + T5;
Chris@82 138 T3U = T4 - T5;
Chris@82 139 T1w = Ip[WS(rs, 5)];
Chris@82 140 T1x = Im[WS(rs, 4)];
Chris@82 141 T1y = T1w - T1x;
Chris@82 142 T3i = T1w + T1x;
Chris@82 143 }
Chris@82 144 T7 = T3 + T6;
Chris@82 145 T4B = T3h - T3i;
Chris@82 146 T4V = T3U + T3T;
Chris@82 147 TJ = T3 - T6;
Chris@82 148 T1z = T1v - T1y;
Chris@82 149 T3j = T3h + T3i;
Chris@82 150 T3V = T3T - T3U;
Chris@82 151 T2H = T1v + T1y;
Chris@82 152 }
Chris@82 153 {
Chris@82 154 E Te, T4C, T4M, TK, T1f, T3m, T3L, T2y, TA, T4G, T4Q, TO, T17, T3w, T3H;
Chris@82 155 E T2C, Tl, T4D, T4N, TL, T1m, T3p, T3O, T2z, Tt, T4F, T4P, TN, T10, T3t;
Chris@82 156 E T3E, T2B;
Chris@82 157 {
Chris@82 158 E Ta, T3k, T1b, T3J, Td, T3K, T1e, T3l;
Chris@82 159 {
Chris@82 160 E T8, T9, T19, T1a;
Chris@82 161 T8 = Rp[WS(rs, 4)];
Chris@82 162 T9 = Rm[WS(rs, 5)];
Chris@82 163 Ta = T8 + T9;
Chris@82 164 T3k = T8 - T9;
Chris@82 165 T19 = Ip[WS(rs, 4)];
Chris@82 166 T1a = Im[WS(rs, 5)];
Chris@82 167 T1b = T19 - T1a;
Chris@82 168 T3J = T19 + T1a;
Chris@82 169 }
Chris@82 170 {
Chris@82 171 E Tb, Tc, T1c, T1d;
Chris@82 172 Tb = Rp[WS(rs, 9)];
Chris@82 173 Tc = Rm[0];
Chris@82 174 Td = Tb + Tc;
Chris@82 175 T3K = Tb - Tc;
Chris@82 176 T1c = Ip[WS(rs, 9)];
Chris@82 177 T1d = Im[0];
Chris@82 178 T1e = T1c - T1d;
Chris@82 179 T3l = T1c + T1d;
Chris@82 180 }
Chris@82 181 Te = Ta + Td;
Chris@82 182 T4C = T3k - T3l;
Chris@82 183 T4M = T3K + T3J;
Chris@82 184 TK = Ta - Td;
Chris@82 185 T1f = T1b - T1e;
Chris@82 186 T3m = T3k + T3l;
Chris@82 187 T3L = T3J - T3K;
Chris@82 188 T2y = T1b + T1e;
Chris@82 189 }
Chris@82 190 {
Chris@82 191 E Tw, T3u, T13, T3G, Tz, T3F, T16, T3v;
Chris@82 192 {
Chris@82 193 E Tu, Tv, T11, T12;
Chris@82 194 Tu = Rm[WS(rs, 7)];
Chris@82 195 Tv = Rp[WS(rs, 2)];
Chris@82 196 Tw = Tu + Tv;
Chris@82 197 T3u = Tu - Tv;
Chris@82 198 T11 = Ip[WS(rs, 2)];
Chris@82 199 T12 = Im[WS(rs, 7)];
Chris@82 200 T13 = T11 - T12;
Chris@82 201 T3G = T11 + T12;
Chris@82 202 }
Chris@82 203 {
Chris@82 204 E Tx, Ty, T14, T15;
Chris@82 205 Tx = Rm[WS(rs, 2)];
Chris@82 206 Ty = Rp[WS(rs, 7)];
Chris@82 207 Tz = Tx + Ty;
Chris@82 208 T3F = Tx - Ty;
Chris@82 209 T14 = Ip[WS(rs, 7)];
Chris@82 210 T15 = Im[WS(rs, 2)];
Chris@82 211 T16 = T14 - T15;
Chris@82 212 T3v = T14 + T15;
Chris@82 213 }
Chris@82 214 TA = Tw + Tz;
Chris@82 215 T4G = T3u + T3v;
Chris@82 216 T4Q = T3F - T3G;
Chris@82 217 TO = Tw - Tz;
Chris@82 218 T17 = T13 - T16;
Chris@82 219 T3w = T3u - T3v;
Chris@82 220 T3H = T3F + T3G;
Chris@82 221 T2C = T13 + T16;
Chris@82 222 }
Chris@82 223 {
Chris@82 224 E Th, T3n, T1i, T3N, Tk, T3M, T1l, T3o;
Chris@82 225 {
Chris@82 226 E Tf, Tg, T1g, T1h;
Chris@82 227 Tf = Rm[WS(rs, 3)];
Chris@82 228 Tg = Rp[WS(rs, 6)];
Chris@82 229 Th = Tf + Tg;
Chris@82 230 T3n = Tf - Tg;
Chris@82 231 T1g = Ip[WS(rs, 6)];
Chris@82 232 T1h = Im[WS(rs, 3)];
Chris@82 233 T1i = T1g - T1h;
Chris@82 234 T3N = T1g + T1h;
Chris@82 235 }
Chris@82 236 {
Chris@82 237 E Ti, Tj, T1j, T1k;
Chris@82 238 Ti = Rp[WS(rs, 1)];
Chris@82 239 Tj = Rm[WS(rs, 8)];
Chris@82 240 Tk = Ti + Tj;
Chris@82 241 T3M = Ti - Tj;
Chris@82 242 T1j = Ip[WS(rs, 1)];
Chris@82 243 T1k = Im[WS(rs, 8)];
Chris@82 244 T1l = T1j - T1k;
Chris@82 245 T3o = T1j + T1k;
Chris@82 246 }
Chris@82 247 Tl = Th + Tk;
Chris@82 248 T4D = T3n - T3o;
Chris@82 249 T4N = T3M - T3N;
Chris@82 250 TL = Th - Tk;
Chris@82 251 T1m = T1i - T1l;
Chris@82 252 T3p = T3n + T3o;
Chris@82 253 T3O = T3M + T3N;
Chris@82 254 T2z = T1i + T1l;
Chris@82 255 }
Chris@82 256 {
Chris@82 257 E Tp, T3r, TW, T3C, Ts, T3D, TZ, T3s;
Chris@82 258 {
Chris@82 259 E Tn, To, TU, TV;
Chris@82 260 Tn = Rp[WS(rs, 8)];
Chris@82 261 To = Rm[WS(rs, 1)];
Chris@82 262 Tp = Tn + To;
Chris@82 263 T3r = Tn - To;
Chris@82 264 TU = Ip[WS(rs, 8)];
Chris@82 265 TV = Im[WS(rs, 1)];
Chris@82 266 TW = TU - TV;
Chris@82 267 T3C = TU + TV;
Chris@82 268 }
Chris@82 269 {
Chris@82 270 E Tq, Tr, TX, TY;
Chris@82 271 Tq = Rm[WS(rs, 6)];
Chris@82 272 Tr = Rp[WS(rs, 3)];
Chris@82 273 Ts = Tq + Tr;
Chris@82 274 T3D = Tq - Tr;
Chris@82 275 TX = Ip[WS(rs, 3)];
Chris@82 276 TY = Im[WS(rs, 6)];
Chris@82 277 TZ = TX - TY;
Chris@82 278 T3s = TX + TY;
Chris@82 279 }
Chris@82 280 Tt = Tp + Ts;
Chris@82 281 T4F = T3r + T3s;
Chris@82 282 T4P = T3D + T3C;
Chris@82 283 TN = Tp - Ts;
Chris@82 284 T10 = TW - TZ;
Chris@82 285 T3t = T3r - T3s;
Chris@82 286 T3E = T3C - T3D;
Chris@82 287 T2B = TW + TZ;
Chris@82 288 }
Chris@82 289 T18 = T10 - T17;
Chris@82 290 T42 = T3t - T3w;
Chris@82 291 T43 = T3m - T3p;
Chris@82 292 T1n = T1f - T1m;
Chris@82 293 T2D = T2B - T2C;
Chris@82 294 T53 = T4F - T4G;
Chris@82 295 T52 = T4C - T4D;
Chris@82 296 T2A = T2y - T2z;
Chris@82 297 T1H = TK - TL;
Chris@82 298 T4R = T4P - T4Q;
Chris@82 299 T4O = T4M - T4N;
Chris@82 300 T1G = TN - TO;
Chris@82 301 T2O = Te - Tl;
Chris@82 302 T3I = T3E + T3H;
Chris@82 303 T2P = Tt - TA;
Chris@82 304 T3P = T3L + T3O;
Chris@82 305 T2I = T2y + T2z;
Chris@82 306 T2J = T2B + T2C;
Chris@82 307 T2K = T2I + T2J;
Chris@82 308 T1A = T1f + T1m;
Chris@82 309 T1B = T10 + T17;
Chris@82 310 T1C = T1A + T1B;
Chris@82 311 {
Chris@82 312 E Tm, TB, TM, TP;
Chris@82 313 Tm = Te + Tl;
Chris@82 314 TB = Tt + TA;
Chris@82 315 TC = Tm + TB;
Chris@82 316 T2w = Tm - TB;
Chris@82 317 {
Chris@82 318 E T3W, T3X, T4E, T4H;
Chris@82 319 T3W = T3L - T3O;
Chris@82 320 T3X = T3E - T3H;
Chris@82 321 T3Y = T3W + T3X;
Chris@82 322 T40 = T3W - T3X;
Chris@82 323 T4E = T4C + T4D;
Chris@82 324 T4H = T4F + T4G;
Chris@82 325 T4I = T4E + T4H;
Chris@82 326 T4K = T4E - T4H;
Chris@82 327 }
Chris@82 328 TM = TK + TL;
Chris@82 329 TP = TN + TO;
Chris@82 330 TQ = TM + TP;
Chris@82 331 TS = TM - TP;
Chris@82 332 {
Chris@82 333 E T3q, T3x, T4W, T4X;
Chris@82 334 T3q = T3m + T3p;
Chris@82 335 T3x = T3t + T3w;
Chris@82 336 T3y = T3q + T3x;
Chris@82 337 T3A = T3q - T3x;
Chris@82 338 T4W = T4M + T4N;
Chris@82 339 T4X = T4P + T4Q;
Chris@82 340 T4Y = T4W + T4X;
Chris@82 341 T50 = T4W - T4X;
Chris@82 342 }
Chris@82 343 }
Chris@82 344 }
Chris@82 345 Rp[0] = T7 + TC;
Chris@82 346 Rm[0] = T2H + T2K;
Chris@82 347 {
Chris@82 348 E T2t, T2q, T2u, T2p;
Chris@82 349 T2t = T1z + T1C;
Chris@82 350 T2p = TJ + TQ;
Chris@82 351 T2q = T2o * T2p;
Chris@82 352 T2u = T2s * T2p;
Chris@82 353 Rp[WS(rs, 5)] = FNMS(T2s, T2t, T2q);
Chris@82 354 Rm[WS(rs, 5)] = FMA(T2o, T2t, T2u);
Chris@82 355 }
Chris@82 356 {
Chris@82 357 E T5t, T5u, T5v, T5w;
Chris@82 358 T5t = T4B + T4I;
Chris@82 359 T5u = T2c * T5t;
Chris@82 360 T5v = T4V + T4Y;
Chris@82 361 T5w = T2c * T5v;
Chris@82 362 Ip[WS(rs, 2)] = FNMS(T2f, T5v, T5u);
Chris@82 363 Im[WS(rs, 2)] = FMA(T2f, T5t, T5w);
Chris@82 364 }
Chris@82 365 {
Chris@82 366 E T4v, T4w, T4z, T4A;
Chris@82 367 T4v = T3j + T3y;
Chris@82 368 T4w = T4u * T4v;
Chris@82 369 T4z = T3V + T3Y;
Chris@82 370 T4A = T4u * T4z;
Chris@82 371 Ip[WS(rs, 7)] = FNMS(T4y, T4z, T4w);
Chris@82 372 Im[WS(rs, 7)] = FMA(T4y, T4v, T4A);
Chris@82 373 }
Chris@82 374 {
Chris@82 375 E T3R, T4p, T49, T4i, T45, T4r, T4d, T4n;
Chris@82 376 {
Chris@82 377 E T3Q, T4h, T3B, T4g, T3z;
Chris@82 378 T3Q = FNMS(KP618033988, T3P, T3I);
Chris@82 379 T4h = FMA(KP618033988, T3I, T3P);
Chris@82 380 T3z = FNMS(KP250000000, T3y, T3j);
Chris@82 381 T3B = FNMS(KP559016994, T3A, T3z);
Chris@82 382 T4g = FMA(KP559016994, T3A, T3z);
Chris@82 383 T3R = FNMS(KP951056516, T3Q, T3B);
Chris@82 384 T4p = FMA(KP951056516, T4h, T4g);
Chris@82 385 T49 = FMA(KP951056516, T3Q, T3B);
Chris@82 386 T4i = FNMS(KP951056516, T4h, T4g);
Chris@82 387 }
Chris@82 388 {
Chris@82 389 E T44, T4m, T41, T4l, T3Z;
Chris@82 390 T44 = FNMS(KP618033988, T43, T42);
Chris@82 391 T4m = FMA(KP618033988, T42, T43);
Chris@82 392 T3Z = FNMS(KP250000000, T3Y, T3V);
Chris@82 393 T41 = FNMS(KP559016994, T40, T3Z);
Chris@82 394 T4l = FMA(KP559016994, T40, T3Z);
Chris@82 395 T45 = FMA(KP951056516, T44, T41);
Chris@82 396 T4r = FNMS(KP951056516, T4m, T4l);
Chris@82 397 T4d = FNMS(KP951056516, T44, T41);
Chris@82 398 T4n = FMA(KP951056516, T4m, T4l);
Chris@82 399 }
Chris@82 400 {
Chris@82 401 E T3S, T46, T4a, T4e;
Chris@82 402 T3S = TE * T3R;
Chris@82 403 Ip[WS(rs, 1)] = FNMS(TH, T45, T3S);
Chris@82 404 T46 = TE * T45;
Chris@82 405 Im[WS(rs, 1)] = FMA(TH, T3R, T46);
Chris@82 406 T4a = T48 * T49;
Chris@82 407 Ip[WS(rs, 3)] = FNMS(T4c, T4d, T4a);
Chris@82 408 T4e = T48 * T4d;
Chris@82 409 Im[WS(rs, 3)] = FMA(T4c, T49, T4e);
Chris@82 410 }
Chris@82 411 {
Chris@82 412 E T4j, T4o, T4q, T4s;
Chris@82 413 T4j = T4f * T4i;
Chris@82 414 Ip[WS(rs, 5)] = FNMS(T4k, T4n, T4j);
Chris@82 415 T4o = T4f * T4n;
Chris@82 416 Im[WS(rs, 5)] = FMA(T4k, T4i, T4o);
Chris@82 417 T4q = T1L * T4p;
Chris@82 418 Ip[WS(rs, 9)] = FNMS(T1N, T4r, T4q);
Chris@82 419 T4s = T1L * T4r;
Chris@82 420 Im[WS(rs, 9)] = FMA(T1N, T4p, T4s);
Chris@82 421 }
Chris@82 422 }
Chris@82 423 {
Chris@82 424 E T4T, T5n, T57, T5e, T55, T5r, T59, T5j;
Chris@82 425 {
Chris@82 426 E T4S, T5d, T4L, T5c, T4J;
Chris@82 427 T4S = FMA(KP618033988, T4R, T4O);
Chris@82 428 T5d = FNMS(KP618033988, T4O, T4R);
Chris@82 429 T4J = FNMS(KP250000000, T4I, T4B);
Chris@82 430 T4L = FMA(KP559016994, T4K, T4J);
Chris@82 431 T5c = FNMS(KP559016994, T4K, T4J);
Chris@82 432 T4T = FNMS(KP951056516, T4S, T4L);
Chris@82 433 T5n = FMA(KP951056516, T5d, T5c);
Chris@82 434 T57 = FMA(KP951056516, T4S, T4L);
Chris@82 435 T5e = FNMS(KP951056516, T5d, T5c);
Chris@82 436 }
Chris@82 437 {
Chris@82 438 E T54, T5i, T51, T5h, T4Z;
Chris@82 439 T54 = FMA(KP618033988, T53, T52);
Chris@82 440 T5i = FNMS(KP618033988, T52, T53);
Chris@82 441 T4Z = FNMS(KP250000000, T4Y, T4V);
Chris@82 442 T51 = FMA(KP559016994, T50, T4Z);
Chris@82 443 T5h = FNMS(KP559016994, T50, T4Z);
Chris@82 444 T55 = FMA(KP951056516, T54, T51);
Chris@82 445 T5r = FNMS(KP951056516, T5i, T5h);
Chris@82 446 T59 = FNMS(KP951056516, T54, T51);
Chris@82 447 T5j = FMA(KP951056516, T5i, T5h);
Chris@82 448 }
Chris@82 449 {
Chris@82 450 E T4U, T56, T58, T5a;
Chris@82 451 T4U = TD * T4T;
Chris@82 452 Ip[0] = FNMS(TG, T55, T4U);
Chris@82 453 T56 = TD * T55;
Chris@82 454 Im[0] = FMA(TG, T4T, T56);
Chris@82 455 T58 = T1V * T57;
Chris@82 456 Ip[WS(rs, 4)] = FNMS(T1X, T59, T58);
Chris@82 457 T5a = T1V * T59;
Chris@82 458 Im[WS(rs, 4)] = FMA(T1X, T57, T5a);
Chris@82 459 }
Chris@82 460 {
Chris@82 461 E T5f, T5k, T5o, T5s;
Chris@82 462 T5f = T5b * T5e;
Chris@82 463 Ip[WS(rs, 6)] = FNMS(T5g, T5j, T5f);
Chris@82 464 T5k = T5b * T5j;
Chris@82 465 Im[WS(rs, 6)] = FMA(T5g, T5e, T5k);
Chris@82 466 T5o = T5m * T5n;
Chris@82 467 Ip[WS(rs, 8)] = FNMS(T5q, T5r, T5o);
Chris@82 468 T5s = T5m * T5r;
Chris@82 469 Im[WS(rs, 8)] = FMA(T5q, T5n, T5s);
Chris@82 470 }
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T2Q, T38, T2N, T37, T2F, T3c, T2V, T34, T2L, T2M;
Chris@82 474 T2Q = FMA(KP618033988, T2P, T2O);
Chris@82 475 T38 = FNMS(KP618033988, T2O, T2P);
Chris@82 476 T2L = FNMS(KP250000000, T2K, T2H);
Chris@82 477 T2M = T2I - T2J;
Chris@82 478 T2N = FMA(KP559016994, T2M, T2L);
Chris@82 479 T37 = FNMS(KP559016994, T2M, T2L);
Chris@82 480 {
Chris@82 481 E T2E, T33, T2x, T32, T2v;
Chris@82 482 T2E = FMA(KP618033988, T2D, T2A);
Chris@82 483 T33 = FNMS(KP618033988, T2A, T2D);
Chris@82 484 T2v = FNMS(KP250000000, TC, T7);
Chris@82 485 T2x = FMA(KP559016994, T2w, T2v);
Chris@82 486 T32 = FNMS(KP559016994, T2w, T2v);
Chris@82 487 T2F = FMA(KP951056516, T2E, T2x);
Chris@82 488 T3c = FMA(KP951056516, T33, T32);
Chris@82 489 T2V = FNMS(KP951056516, T2E, T2x);
Chris@82 490 T34 = FNMS(KP951056516, T33, T32);
Chris@82 491 }
Chris@82 492 {
Chris@82 493 E T2G, T2S, T2R, T3d, T3g, T3f;
Chris@82 494 T2G = T29 * T2F;
Chris@82 495 T2S = T2b * T2F;
Chris@82 496 T2R = FNMS(KP951056516, T2Q, T2N);
Chris@82 497 Rp[WS(rs, 2)] = FNMS(T2b, T2R, T2G);
Chris@82 498 Rm[WS(rs, 2)] = FMA(T29, T2R, T2S);
Chris@82 499 T3d = T3b * T3c;
Chris@82 500 T3g = T3e * T3c;
Chris@82 501 T3f = FNMS(KP951056516, T38, T37);
Chris@82 502 Rp[WS(rs, 6)] = FNMS(T3e, T3f, T3d);
Chris@82 503 Rm[WS(rs, 6)] = FMA(T3b, T3f, T3g);
Chris@82 504 }
Chris@82 505 {
Chris@82 506 E T2W, T30, T2Z, T35, T3a, T39;
Chris@82 507 T2W = T2U * T2V;
Chris@82 508 T30 = T2Y * T2V;
Chris@82 509 T2Z = FMA(KP951056516, T2Q, T2N);
Chris@82 510 Rp[WS(rs, 8)] = FNMS(T2Y, T2Z, T2W);
Chris@82 511 Rm[WS(rs, 8)] = FMA(T2U, T2Z, T30);
Chris@82 512 T35 = T31 * T34;
Chris@82 513 T3a = T36 * T34;
Chris@82 514 T39 = FMA(KP951056516, T38, T37);
Chris@82 515 Rp[WS(rs, 4)] = FNMS(T36, T39, T35);
Chris@82 516 Rm[WS(rs, 4)] = FMA(T31, T39, T3a);
Chris@82 517 }
Chris@82 518 }
Chris@82 519 {
Chris@82 520 E T1I, T26, T1F, T25, T1p, T2h, T1P, T21, T1D, T1E;
Chris@82 521 T1I = FNMS(KP618033988, T1H, T1G);
Chris@82 522 T26 = FMA(KP618033988, T1G, T1H);
Chris@82 523 T1D = FNMS(KP250000000, T1C, T1z);
Chris@82 524 T1E = T1A - T1B;
Chris@82 525 T1F = FNMS(KP559016994, T1E, T1D);
Chris@82 526 T25 = FMA(KP559016994, T1E, T1D);
Chris@82 527 {
Chris@82 528 E T1o, T20, TT, T1Z, TR;
Chris@82 529 T1o = FNMS(KP618033988, T1n, T18);
Chris@82 530 T20 = FMA(KP618033988, T18, T1n);
Chris@82 531 TR = FNMS(KP250000000, TQ, TJ);
Chris@82 532 TT = FNMS(KP559016994, TS, TR);
Chris@82 533 T1Z = FMA(KP559016994, TS, TR);
Chris@82 534 T1p = FMA(KP951056516, T1o, TT);
Chris@82 535 T2h = FMA(KP951056516, T20, T1Z);
Chris@82 536 T1P = FNMS(KP951056516, T1o, TT);
Chris@82 537 T21 = FNMS(KP951056516, T20, T1Z);
Chris@82 538 }
Chris@82 539 {
Chris@82 540 E T1q, T1K, T1J, T2i, T2m, T2l;
Chris@82 541 T1q = TI * T1p;
Chris@82 542 T1K = T1s * T1p;
Chris@82 543 T1J = FNMS(KP951056516, T1I, T1F);
Chris@82 544 Rp[WS(rs, 1)] = FNMS(T1s, T1J, T1q);
Chris@82 545 Rm[WS(rs, 1)] = FMA(TI, T1J, T1K);
Chris@82 546 T2i = T2g * T2h;
Chris@82 547 T2m = T2k * T2h;
Chris@82 548 T2l = FNMS(KP951056516, T26, T25);
Chris@82 549 Rp[WS(rs, 7)] = FNMS(T2k, T2l, T2i);
Chris@82 550 Rm[WS(rs, 7)] = FMA(T2g, T2l, T2m);
Chris@82 551 }
Chris@82 552 {
Chris@82 553 E T1Q, T1U, T1T, T22, T28, T27;
Chris@82 554 T1Q = T1O * T1P;
Chris@82 555 T1U = T1S * T1P;
Chris@82 556 T1T = FMA(KP951056516, T1I, T1F);
Chris@82 557 Rp[WS(rs, 9)] = FNMS(T1S, T1T, T1Q);
Chris@82 558 Rm[WS(rs, 9)] = FMA(T1O, T1T, T1U);
Chris@82 559 T22 = T1Y * T21;
Chris@82 560 T28 = T24 * T21;
Chris@82 561 T27 = FMA(KP951056516, T26, T25);
Chris@82 562 Rp[WS(rs, 3)] = FNMS(T24, T27, T22);
Chris@82 563 Rm[WS(rs, 3)] = FMA(T1Y, T27, T28);
Chris@82 564 }
Chris@82 565 }
Chris@82 566 }
Chris@82 567 }
Chris@82 568 }
Chris@82 569 }
Chris@82 570
Chris@82 571 static const tw_instr twinstr[] = {
Chris@82 572 {TW_CEXP, 1, 1},
Chris@82 573 {TW_CEXP, 1, 3},
Chris@82 574 {TW_CEXP, 1, 9},
Chris@82 575 {TW_CEXP, 1, 19},
Chris@82 576 {TW_NEXT, 1, 0}
Chris@82 577 };
Chris@82 578
Chris@82 579 static const hc2c_desc desc = { 20, "hc2cb2_20", twinstr, &GENUS, {136, 58, 140, 0} };
Chris@82 580
Chris@82 581 void X(codelet_hc2cb2_20) (planner *p) {
Chris@82 582 X(khc2c_register) (p, hc2cb2_20, &desc, HC2C_VIA_RDFT);
Chris@82 583 }
Chris@82 584 #else
Chris@82 585
Chris@82 586 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 20 -dif -name hc2cb2_20 -include rdft/scalar/hc2cb.h */
Chris@82 587
Chris@82 588 /*
Chris@82 589 * This function contains 276 FP additions, 164 FP multiplications,
Chris@82 590 * (or, 204 additions, 92 multiplications, 72 fused multiply/add),
Chris@82 591 * 137 stack variables, 4 constants, and 80 memory accesses
Chris@82 592 */
Chris@82 593 #include "rdft/scalar/hc2cb.h"
Chris@82 594
Chris@82 595 static void hc2cb2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 596 {
Chris@82 597 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 598 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 599 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 600 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 601 {
Chris@82 602 INT m;
Chris@82 603 for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 604 E TD, TG, TE, TH, TJ, T1t, T27, T25, T1T, T1R, T1V, T2j, T2Z, T21, T2X;
Chris@82 605 E T2T, T2n, T2P, T3V, T41, T3R, T3X, T29, T2c, T4H, T4L, T1L, T1M, T1N, T2d;
Chris@82 606 E T4R, T1P, T4P, T49, T2N, T2f, T47, T2L;
Chris@82 607 {
Chris@82 608 E T1U, T2l, T1Z, T2i, T1S, T2m, T20, T2h;
Chris@82 609 {
Chris@82 610 E TF, T1s, TI, T1r;
Chris@82 611 TD = W[0];
Chris@82 612 TG = W[1];
Chris@82 613 TE = W[2];
Chris@82 614 TH = W[3];
Chris@82 615 TF = TD * TE;
Chris@82 616 T1s = TG * TE;
Chris@82 617 TI = TG * TH;
Chris@82 618 T1r = TD * TH;
Chris@82 619 TJ = TF + TI;
Chris@82 620 T1t = T1r - T1s;
Chris@82 621 T27 = T1r + T1s;
Chris@82 622 T25 = TF - TI;
Chris@82 623 T1T = W[5];
Chris@82 624 T1U = TH * T1T;
Chris@82 625 T2l = TD * T1T;
Chris@82 626 T1Z = TE * T1T;
Chris@82 627 T2i = TG * T1T;
Chris@82 628 T1R = W[4];
Chris@82 629 T1S = TE * T1R;
Chris@82 630 T2m = TG * T1R;
Chris@82 631 T20 = TH * T1R;
Chris@82 632 T2h = TD * T1R;
Chris@82 633 }
Chris@82 634 T1V = T1S + T1U;
Chris@82 635 T2j = T2h - T2i;
Chris@82 636 T2Z = T1Z + T20;
Chris@82 637 T21 = T1Z - T20;
Chris@82 638 T2X = T1S - T1U;
Chris@82 639 T2T = T2l - T2m;
Chris@82 640 T2n = T2l + T2m;
Chris@82 641 T2P = T2h + T2i;
Chris@82 642 {
Chris@82 643 E T3T, T3U, T3P, T3Q;
Chris@82 644 T3T = TJ * T1T;
Chris@82 645 T3U = T1t * T1R;
Chris@82 646 T3V = T3T - T3U;
Chris@82 647 T41 = T3T + T3U;
Chris@82 648 T3P = TJ * T1R;
Chris@82 649 T3Q = T1t * T1T;
Chris@82 650 T3R = T3P + T3Q;
Chris@82 651 T3X = T3P - T3Q;
Chris@82 652 {
Chris@82 653 E T26, T28, T2a, T2b;
Chris@82 654 T26 = T25 * T1R;
Chris@82 655 T28 = T27 * T1T;
Chris@82 656 T29 = T26 + T28;
Chris@82 657 T2a = T25 * T1T;
Chris@82 658 T2b = T27 * T1R;
Chris@82 659 T2c = T2a - T2b;
Chris@82 660 T4H = T26 - T28;
Chris@82 661 T4L = T2a + T2b;
Chris@82 662 T1L = W[6];
Chris@82 663 T1M = W[7];
Chris@82 664 T1N = FMA(TD, T1L, TG * T1M);
Chris@82 665 T2d = FMA(T29, T1L, T2c * T1M);
Chris@82 666 T4R = FNMS(T1t, T1L, TJ * T1M);
Chris@82 667 T1P = FNMS(TG, T1L, TD * T1M);
Chris@82 668 T4P = FMA(TJ, T1L, T1t * T1M);
Chris@82 669 T49 = FNMS(T27, T1L, T25 * T1M);
Chris@82 670 T2N = FNMS(TH, T1L, TE * T1M);
Chris@82 671 T2f = FNMS(T2c, T1L, T29 * T1M);
Chris@82 672 T47 = FMA(T25, T1L, T27 * T1M);
Chris@82 673 T2L = FMA(TE, T1L, TH * T1M);
Chris@82 674 }
Chris@82 675 }
Chris@82 676 }
Chris@82 677 {
Chris@82 678 E T7, T4i, T4x, TK, T1D, T3i, T3E, T2D, T19, T3L, T3M, T1o, T2x, T4C, T4B;
Chris@82 679 E T2u, T1v, T4r, T4o, T1u, T2H, T37, T2I, T3e, T3p, T3w, T3x, Tm, TB, TC;
Chris@82 680 E T4u, T4v, T4y, T2A, T2B, T2E, T1E, T1F, T1G, T4d, T4g, T4j, T3F, T3G, T3H;
Chris@82 681 E TN, TQ, TR, T48, T4a;
Chris@82 682 {
Chris@82 683 E T3, T3g, T1z, T3C, T6, T3D, T1C, T3h;
Chris@82 684 {
Chris@82 685 E T1, T2, T1x, T1y;
Chris@82 686 T1 = Rp[0];
Chris@82 687 T2 = Rm[WS(rs, 9)];
Chris@82 688 T3 = T1 + T2;
Chris@82 689 T3g = T1 - T2;
Chris@82 690 T1x = Ip[0];
Chris@82 691 T1y = Im[WS(rs, 9)];
Chris@82 692 T1z = T1x - T1y;
Chris@82 693 T3C = T1x + T1y;
Chris@82 694 }
Chris@82 695 {
Chris@82 696 E T4, T5, T1A, T1B;
Chris@82 697 T4 = Rp[WS(rs, 5)];
Chris@82 698 T5 = Rm[WS(rs, 4)];
Chris@82 699 T6 = T4 + T5;
Chris@82 700 T3D = T4 - T5;
Chris@82 701 T1A = Ip[WS(rs, 5)];
Chris@82 702 T1B = Im[WS(rs, 4)];
Chris@82 703 T1C = T1A - T1B;
Chris@82 704 T3h = T1A + T1B;
Chris@82 705 }
Chris@82 706 T7 = T3 + T6;
Chris@82 707 T4i = T3g - T3h;
Chris@82 708 T4x = T3D + T3C;
Chris@82 709 TK = T3 - T6;
Chris@82 710 T1D = T1z - T1C;
Chris@82 711 T3i = T3g + T3h;
Chris@82 712 T3E = T3C - T3D;
Chris@82 713 T2D = T1z + T1C;
Chris@82 714 }
Chris@82 715 {
Chris@82 716 E Te, T4b, T4m, TL, T11, T33, T3l, T2s, TA, T4f, T4q, TP, T1n, T3d, T3v;
Chris@82 717 E T2w, Tl, T4c, T4n, TM, T18, T36, T3o, T2t, Tt, T4e, T4p, TO, T1g, T3a;
Chris@82 718 E T3s, T2v;
Chris@82 719 {
Chris@82 720 E Ta, T3j, TX, T31, Td, T32, T10, T3k;
Chris@82 721 {
Chris@82 722 E T8, T9, TV, TW;
Chris@82 723 T8 = Rp[WS(rs, 4)];
Chris@82 724 T9 = Rm[WS(rs, 5)];
Chris@82 725 Ta = T8 + T9;
Chris@82 726 T3j = T8 - T9;
Chris@82 727 TV = Ip[WS(rs, 4)];
Chris@82 728 TW = Im[WS(rs, 5)];
Chris@82 729 TX = TV - TW;
Chris@82 730 T31 = TV + TW;
Chris@82 731 }
Chris@82 732 {
Chris@82 733 E Tb, Tc, TY, TZ;
Chris@82 734 Tb = Rp[WS(rs, 9)];
Chris@82 735 Tc = Rm[0];
Chris@82 736 Td = Tb + Tc;
Chris@82 737 T32 = Tb - Tc;
Chris@82 738 TY = Ip[WS(rs, 9)];
Chris@82 739 TZ = Im[0];
Chris@82 740 T10 = TY - TZ;
Chris@82 741 T3k = TY + TZ;
Chris@82 742 }
Chris@82 743 Te = Ta + Td;
Chris@82 744 T4b = T3j - T3k;
Chris@82 745 T4m = T32 + T31;
Chris@82 746 TL = Ta - Td;
Chris@82 747 T11 = TX - T10;
Chris@82 748 T33 = T31 - T32;
Chris@82 749 T3l = T3j + T3k;
Chris@82 750 T2s = TX + T10;
Chris@82 751 }
Chris@82 752 {
Chris@82 753 E Tw, T3t, T1j, T3c, Tz, T3b, T1m, T3u;
Chris@82 754 {
Chris@82 755 E Tu, Tv, T1h, T1i;
Chris@82 756 Tu = Rm[WS(rs, 7)];
Chris@82 757 Tv = Rp[WS(rs, 2)];
Chris@82 758 Tw = Tu + Tv;
Chris@82 759 T3t = Tu - Tv;
Chris@82 760 T1h = Ip[WS(rs, 2)];
Chris@82 761 T1i = Im[WS(rs, 7)];
Chris@82 762 T1j = T1h - T1i;
Chris@82 763 T3c = T1h + T1i;
Chris@82 764 }
Chris@82 765 {
Chris@82 766 E Tx, Ty, T1k, T1l;
Chris@82 767 Tx = Rm[WS(rs, 2)];
Chris@82 768 Ty = Rp[WS(rs, 7)];
Chris@82 769 Tz = Tx + Ty;
Chris@82 770 T3b = Tx - Ty;
Chris@82 771 T1k = Ip[WS(rs, 7)];
Chris@82 772 T1l = Im[WS(rs, 2)];
Chris@82 773 T1m = T1k - T1l;
Chris@82 774 T3u = T1k + T1l;
Chris@82 775 }
Chris@82 776 TA = Tw + Tz;
Chris@82 777 T4f = T3t + T3u;
Chris@82 778 T4q = T3b - T3c;
Chris@82 779 TP = Tw - Tz;
Chris@82 780 T1n = T1j - T1m;
Chris@82 781 T3d = T3b + T3c;
Chris@82 782 T3v = T3t - T3u;
Chris@82 783 T2w = T1j + T1m;
Chris@82 784 }
Chris@82 785 {
Chris@82 786 E Th, T3m, T14, T35, Tk, T34, T17, T3n;
Chris@82 787 {
Chris@82 788 E Tf, Tg, T12, T13;
Chris@82 789 Tf = Rm[WS(rs, 3)];
Chris@82 790 Tg = Rp[WS(rs, 6)];
Chris@82 791 Th = Tf + Tg;
Chris@82 792 T3m = Tf - Tg;
Chris@82 793 T12 = Ip[WS(rs, 6)];
Chris@82 794 T13 = Im[WS(rs, 3)];
Chris@82 795 T14 = T12 - T13;
Chris@82 796 T35 = T12 + T13;
Chris@82 797 }
Chris@82 798 {
Chris@82 799 E Ti, Tj, T15, T16;
Chris@82 800 Ti = Rp[WS(rs, 1)];
Chris@82 801 Tj = Rm[WS(rs, 8)];
Chris@82 802 Tk = Ti + Tj;
Chris@82 803 T34 = Ti - Tj;
Chris@82 804 T15 = Ip[WS(rs, 1)];
Chris@82 805 T16 = Im[WS(rs, 8)];
Chris@82 806 T17 = T15 - T16;
Chris@82 807 T3n = T15 + T16;
Chris@82 808 }
Chris@82 809 Tl = Th + Tk;
Chris@82 810 T4c = T3m - T3n;
Chris@82 811 T4n = T34 - T35;
Chris@82 812 TM = Th - Tk;
Chris@82 813 T18 = T14 - T17;
Chris@82 814 T36 = T34 + T35;
Chris@82 815 T3o = T3m + T3n;
Chris@82 816 T2t = T14 + T17;
Chris@82 817 }
Chris@82 818 {
Chris@82 819 E Tp, T3q, T1c, T38, Ts, T39, T1f, T3r;
Chris@82 820 {
Chris@82 821 E Tn, To, T1a, T1b;
Chris@82 822 Tn = Rp[WS(rs, 8)];
Chris@82 823 To = Rm[WS(rs, 1)];
Chris@82 824 Tp = Tn + To;
Chris@82 825 T3q = Tn - To;
Chris@82 826 T1a = Ip[WS(rs, 8)];
Chris@82 827 T1b = Im[WS(rs, 1)];
Chris@82 828 T1c = T1a - T1b;
Chris@82 829 T38 = T1a + T1b;
Chris@82 830 }
Chris@82 831 {
Chris@82 832 E Tq, Tr, T1d, T1e;
Chris@82 833 Tq = Rm[WS(rs, 6)];
Chris@82 834 Tr = Rp[WS(rs, 3)];
Chris@82 835 Ts = Tq + Tr;
Chris@82 836 T39 = Tq - Tr;
Chris@82 837 T1d = Ip[WS(rs, 3)];
Chris@82 838 T1e = Im[WS(rs, 6)];
Chris@82 839 T1f = T1d - T1e;
Chris@82 840 T3r = T1d + T1e;
Chris@82 841 }
Chris@82 842 Tt = Tp + Ts;
Chris@82 843 T4e = T3q + T3r;
Chris@82 844 T4p = T39 + T38;
Chris@82 845 TO = Tp - Ts;
Chris@82 846 T1g = T1c - T1f;
Chris@82 847 T3a = T38 - T39;
Chris@82 848 T3s = T3q - T3r;
Chris@82 849 T2v = T1c + T1f;
Chris@82 850 }
Chris@82 851 T19 = T11 - T18;
Chris@82 852 T3L = T3l - T3o;
Chris@82 853 T3M = T3s - T3v;
Chris@82 854 T1o = T1g - T1n;
Chris@82 855 T2x = T2v - T2w;
Chris@82 856 T4C = T4e - T4f;
Chris@82 857 T4B = T4b - T4c;
Chris@82 858 T2u = T2s - T2t;
Chris@82 859 T1v = TO - TP;
Chris@82 860 T4r = T4p - T4q;
Chris@82 861 T4o = T4m - T4n;
Chris@82 862 T1u = TL - TM;
Chris@82 863 T2H = Te - Tl;
Chris@82 864 T37 = T33 + T36;
Chris@82 865 T2I = Tt - TA;
Chris@82 866 T3e = T3a + T3d;
Chris@82 867 T3p = T3l + T3o;
Chris@82 868 T3w = T3s + T3v;
Chris@82 869 T3x = T3p + T3w;
Chris@82 870 Tm = Te + Tl;
Chris@82 871 TB = Tt + TA;
Chris@82 872 TC = Tm + TB;
Chris@82 873 T4u = T4m + T4n;
Chris@82 874 T4v = T4p + T4q;
Chris@82 875 T4y = T4u + T4v;
Chris@82 876 T2A = T2s + T2t;
Chris@82 877 T2B = T2v + T2w;
Chris@82 878 T2E = T2A + T2B;
Chris@82 879 T1E = T11 + T18;
Chris@82 880 T1F = T1g + T1n;
Chris@82 881 T1G = T1E + T1F;
Chris@82 882 T4d = T4b + T4c;
Chris@82 883 T4g = T4e + T4f;
Chris@82 884 T4j = T4d + T4g;
Chris@82 885 T3F = T33 - T36;
Chris@82 886 T3G = T3a - T3d;
Chris@82 887 T3H = T3F + T3G;
Chris@82 888 TN = TL + TM;
Chris@82 889 TQ = TO + TP;
Chris@82 890 TR = TN + TQ;
Chris@82 891 }
Chris@82 892 Rp[0] = T7 + TC;
Chris@82 893 Rm[0] = T2D + T2E;
Chris@82 894 {
Chris@82 895 E T2k, T2o, T4T, T4U;
Chris@82 896 T2k = TK + TR;
Chris@82 897 T2o = T1D + T1G;
Chris@82 898 Rp[WS(rs, 5)] = FNMS(T2n, T2o, T2j * T2k);
Chris@82 899 Rm[WS(rs, 5)] = FMA(T2n, T2k, T2j * T2o);
Chris@82 900 T4T = T4i + T4j;
Chris@82 901 T4U = T4x + T4y;
Chris@82 902 Ip[WS(rs, 2)] = FNMS(T2c, T4U, T29 * T4T);
Chris@82 903 Im[WS(rs, 2)] = FMA(T29, T4U, T2c * T4T);
Chris@82 904 }
Chris@82 905 T48 = T3i + T3x;
Chris@82 906 T4a = T3E + T3H;
Chris@82 907 Ip[WS(rs, 7)] = FNMS(T49, T4a, T47 * T48);
Chris@82 908 Im[WS(rs, 7)] = FMA(T47, T4a, T49 * T48);
Chris@82 909 {
Chris@82 910 E T2y, T2J, T2V, T2R, T2G, T2U, T2r, T2Q;
Chris@82 911 T2y = FMA(KP951056516, T2u, KP587785252 * T2x);
Chris@82 912 T2J = FMA(KP951056516, T2H, KP587785252 * T2I);
Chris@82 913 T2V = FNMS(KP951056516, T2I, KP587785252 * T2H);
Chris@82 914 T2R = FNMS(KP951056516, T2x, KP587785252 * T2u);
Chris@82 915 {
Chris@82 916 E T2C, T2F, T2p, T2q;
Chris@82 917 T2C = KP559016994 * (T2A - T2B);
Chris@82 918 T2F = FNMS(KP250000000, T2E, T2D);
Chris@82 919 T2G = T2C + T2F;
Chris@82 920 T2U = T2F - T2C;
Chris@82 921 T2p = KP559016994 * (Tm - TB);
Chris@82 922 T2q = FNMS(KP250000000, TC, T7);
Chris@82 923 T2r = T2p + T2q;
Chris@82 924 T2Q = T2q - T2p;
Chris@82 925 }
Chris@82 926 {
Chris@82 927 E T2z, T2K, T2Y, T30;
Chris@82 928 T2z = T2r + T2y;
Chris@82 929 T2K = T2G - T2J;
Chris@82 930 Rp[WS(rs, 2)] = FNMS(T27, T2K, T25 * T2z);
Chris@82 931 Rm[WS(rs, 2)] = FMA(T27, T2z, T25 * T2K);
Chris@82 932 T2Y = T2Q - T2R;
Chris@82 933 T30 = T2V + T2U;
Chris@82 934 Rp[WS(rs, 6)] = FNMS(T2Z, T30, T2X * T2Y);
Chris@82 935 Rm[WS(rs, 6)] = FMA(T2Z, T2Y, T2X * T30);
Chris@82 936 }
Chris@82 937 {
Chris@82 938 E T2M, T2O, T2S, T2W;
Chris@82 939 T2M = T2r - T2y;
Chris@82 940 T2O = T2J + T2G;
Chris@82 941 Rp[WS(rs, 8)] = FNMS(T2N, T2O, T2L * T2M);
Chris@82 942 Rm[WS(rs, 8)] = FMA(T2N, T2M, T2L * T2O);
Chris@82 943 T2S = T2Q + T2R;
Chris@82 944 T2W = T2U - T2V;
Chris@82 945 Rp[WS(rs, 4)] = FNMS(T2T, T2W, T2P * T2S);
Chris@82 946 Rm[WS(rs, 4)] = FMA(T2T, T2S, T2P * T2W);
Chris@82 947 }
Chris@82 948 }
Chris@82 949 {
Chris@82 950 E T4s, T4D, T4N, T4I, T4A, T4M, T4l, T4J;
Chris@82 951 T4s = FMA(KP951056516, T4o, KP587785252 * T4r);
Chris@82 952 T4D = FMA(KP951056516, T4B, KP587785252 * T4C);
Chris@82 953 T4N = FNMS(KP951056516, T4C, KP587785252 * T4B);
Chris@82 954 T4I = FNMS(KP951056516, T4r, KP587785252 * T4o);
Chris@82 955 {
Chris@82 956 E T4w, T4z, T4h, T4k;
Chris@82 957 T4w = KP559016994 * (T4u - T4v);
Chris@82 958 T4z = FNMS(KP250000000, T4y, T4x);
Chris@82 959 T4A = T4w + T4z;
Chris@82 960 T4M = T4z - T4w;
Chris@82 961 T4h = KP559016994 * (T4d - T4g);
Chris@82 962 T4k = FNMS(KP250000000, T4j, T4i);
Chris@82 963 T4l = T4h + T4k;
Chris@82 964 T4J = T4k - T4h;
Chris@82 965 }
Chris@82 966 {
Chris@82 967 E T4t, T4E, T4Q, T4S;
Chris@82 968 T4t = T4l - T4s;
Chris@82 969 T4E = T4A + T4D;
Chris@82 970 Ip[0] = FNMS(TG, T4E, TD * T4t);
Chris@82 971 Im[0] = FMA(TD, T4E, TG * T4t);
Chris@82 972 T4Q = T4J - T4I;
Chris@82 973 T4S = T4M + T4N;
Chris@82 974 Ip[WS(rs, 8)] = FNMS(T4R, T4S, T4P * T4Q);
Chris@82 975 Im[WS(rs, 8)] = FMA(T4P, T4S, T4R * T4Q);
Chris@82 976 }
Chris@82 977 {
Chris@82 978 E T4F, T4G, T4K, T4O;
Chris@82 979 T4F = T4s + T4l;
Chris@82 980 T4G = T4A - T4D;
Chris@82 981 Ip[WS(rs, 4)] = FNMS(T1T, T4G, T1R * T4F);
Chris@82 982 Im[WS(rs, 4)] = FMA(T1R, T4G, T1T * T4F);
Chris@82 983 T4K = T4I + T4J;
Chris@82 984 T4O = T4M - T4N;
Chris@82 985 Ip[WS(rs, 6)] = FNMS(T4L, T4O, T4H * T4K);
Chris@82 986 Im[WS(rs, 6)] = FMA(T4H, T4O, T4L * T4K);
Chris@82 987 }
Chris@82 988 }
Chris@82 989 {
Chris@82 990 E T1p, T1w, T22, T1X, T1J, T23, TU, T1W;
Chris@82 991 T1p = FNMS(KP951056516, T1o, KP587785252 * T19);
Chris@82 992 T1w = FNMS(KP951056516, T1v, KP587785252 * T1u);
Chris@82 993 T22 = FMA(KP951056516, T1u, KP587785252 * T1v);
Chris@82 994 T1X = FMA(KP951056516, T19, KP587785252 * T1o);
Chris@82 995 {
Chris@82 996 E T1H, T1I, TS, TT;
Chris@82 997 T1H = FNMS(KP250000000, T1G, T1D);
Chris@82 998 T1I = KP559016994 * (T1E - T1F);
Chris@82 999 T1J = T1H - T1I;
Chris@82 1000 T23 = T1I + T1H;
Chris@82 1001 TS = FNMS(KP250000000, TR, TK);
Chris@82 1002 TT = KP559016994 * (TN - TQ);
Chris@82 1003 TU = TS - TT;
Chris@82 1004 T1W = TT + TS;
Chris@82 1005 }
Chris@82 1006 {
Chris@82 1007 E T1q, T1K, T2e, T2g;
Chris@82 1008 T1q = TU - T1p;
Chris@82 1009 T1K = T1w + T1J;
Chris@82 1010 Rp[WS(rs, 1)] = FNMS(T1t, T1K, TJ * T1q);
Chris@82 1011 Rm[WS(rs, 1)] = FMA(T1t, T1q, TJ * T1K);
Chris@82 1012 T2e = T1W + T1X;
Chris@82 1013 T2g = T23 - T22;
Chris@82 1014 Rp[WS(rs, 7)] = FNMS(T2f, T2g, T2d * T2e);
Chris@82 1015 Rm[WS(rs, 7)] = FMA(T2f, T2e, T2d * T2g);
Chris@82 1016 }
Chris@82 1017 {
Chris@82 1018 E T1O, T1Q, T1Y, T24;
Chris@82 1019 T1O = TU + T1p;
Chris@82 1020 T1Q = T1J - T1w;
Chris@82 1021 Rp[WS(rs, 9)] = FNMS(T1P, T1Q, T1N * T1O);
Chris@82 1022 Rm[WS(rs, 9)] = FMA(T1P, T1O, T1N * T1Q);
Chris@82 1023 T1Y = T1W - T1X;
Chris@82 1024 T24 = T22 + T23;
Chris@82 1025 Rp[WS(rs, 3)] = FNMS(T21, T24, T1V * T1Y);
Chris@82 1026 Rm[WS(rs, 3)] = FMA(T21, T1Y, T1V * T24);
Chris@82 1027 }
Chris@82 1028 }
Chris@82 1029 {
Chris@82 1030 E T3f, T3N, T43, T3Z, T3K, T42, T3A, T3Y;
Chris@82 1031 T3f = FNMS(KP951056516, T3e, KP587785252 * T37);
Chris@82 1032 T3N = FNMS(KP951056516, T3M, KP587785252 * T3L);
Chris@82 1033 T43 = FMA(KP951056516, T3L, KP587785252 * T3M);
Chris@82 1034 T3Z = FMA(KP951056516, T37, KP587785252 * T3e);
Chris@82 1035 {
Chris@82 1036 E T3I, T3J, T3y, T3z;
Chris@82 1037 T3I = FNMS(KP250000000, T3H, T3E);
Chris@82 1038 T3J = KP559016994 * (T3F - T3G);
Chris@82 1039 T3K = T3I - T3J;
Chris@82 1040 T42 = T3J + T3I;
Chris@82 1041 T3y = FNMS(KP250000000, T3x, T3i);
Chris@82 1042 T3z = KP559016994 * (T3p - T3w);
Chris@82 1043 T3A = T3y - T3z;
Chris@82 1044 T3Y = T3z + T3y;
Chris@82 1045 }
Chris@82 1046 {
Chris@82 1047 E T3B, T3O, T45, T46;
Chris@82 1048 T3B = T3f + T3A;
Chris@82 1049 T3O = T3K - T3N;
Chris@82 1050 Ip[WS(rs, 1)] = FNMS(TH, T3O, TE * T3B);
Chris@82 1051 Im[WS(rs, 1)] = FMA(TE, T3O, TH * T3B);
Chris@82 1052 T45 = T3Z + T3Y;
Chris@82 1053 T46 = T42 - T43;
Chris@82 1054 Ip[WS(rs, 9)] = FNMS(T1M, T46, T1L * T45);
Chris@82 1055 Im[WS(rs, 9)] = FMA(T1L, T46, T1M * T45);
Chris@82 1056 }
Chris@82 1057 {
Chris@82 1058 E T3S, T3W, T40, T44;
Chris@82 1059 T3S = T3A - T3f;
Chris@82 1060 T3W = T3K + T3N;
Chris@82 1061 Ip[WS(rs, 3)] = FNMS(T3V, T3W, T3R * T3S);
Chris@82 1062 Im[WS(rs, 3)] = FMA(T3R, T3W, T3V * T3S);
Chris@82 1063 T40 = T3Y - T3Z;
Chris@82 1064 T44 = T42 + T43;
Chris@82 1065 Ip[WS(rs, 5)] = FNMS(T41, T44, T3X * T40);
Chris@82 1066 Im[WS(rs, 5)] = FMA(T3X, T44, T41 * T40);
Chris@82 1067 }
Chris@82 1068 }
Chris@82 1069 }
Chris@82 1070 }
Chris@82 1071 }
Chris@82 1072 }
Chris@82 1073
Chris@82 1074 static const tw_instr twinstr[] = {
Chris@82 1075 {TW_CEXP, 1, 1},
Chris@82 1076 {TW_CEXP, 1, 3},
Chris@82 1077 {TW_CEXP, 1, 9},
Chris@82 1078 {TW_CEXP, 1, 19},
Chris@82 1079 {TW_NEXT, 1, 0}
Chris@82 1080 };
Chris@82 1081
Chris@82 1082 static const hc2c_desc desc = { 20, "hc2cb2_20", twinstr, &GENUS, {204, 92, 72, 0} };
Chris@82 1083
Chris@82 1084 void X(codelet_hc2cb2_20) (planner *p) {
Chris@82 1085 X(khc2c_register) (p, hc2cb2_20, &desc, HC2C_VIA_RDFT);
Chris@82 1086 }
Chris@82 1087 #endif