annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cb_20.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:54 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cb_20 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 246 FP additions, 148 FP multiplications,
Chris@82 32 * (or, 136 additions, 38 multiplications, 110 fused multiply/add),
Chris@82 33 * 91 stack variables, 4 constants, and 80 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cb_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 42 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 46 E T7, T4e, T4z, TE, T1t, T2W, T3z, T2l, T13, T3G, T3H, T1i, T2g, T4H, T4G;
Chris@82 47 E T2d, T1B, T4u, T4r, T1A, T2s, T3l, T2t, T3s, T2m, T2n, T2o, T1u, T1v, T1w;
Chris@82 48 E TC, T29, T3C, T3E, T4l, T4n, TL, TN, T3b, T3d, T4C, T4E;
Chris@82 49 {
Chris@82 50 E T3, T2U, T1p, T3x, T6, T3y, T1s, T2V;
Chris@82 51 {
Chris@82 52 E T1, T2, T1n, T1o;
Chris@82 53 T1 = Rp[0];
Chris@82 54 T2 = Rm[WS(rs, 9)];
Chris@82 55 T3 = T1 + T2;
Chris@82 56 T2U = T1 - T2;
Chris@82 57 T1n = Ip[0];
Chris@82 58 T1o = Im[WS(rs, 9)];
Chris@82 59 T1p = T1n - T1o;
Chris@82 60 T3x = T1n + T1o;
Chris@82 61 }
Chris@82 62 {
Chris@82 63 E T4, T5, T1q, T1r;
Chris@82 64 T4 = Rp[WS(rs, 5)];
Chris@82 65 T5 = Rm[WS(rs, 4)];
Chris@82 66 T6 = T4 + T5;
Chris@82 67 T3y = T4 - T5;
Chris@82 68 T1q = Ip[WS(rs, 5)];
Chris@82 69 T1r = Im[WS(rs, 4)];
Chris@82 70 T1s = T1q - T1r;
Chris@82 71 T2V = T1q + T1r;
Chris@82 72 }
Chris@82 73 T7 = T3 + T6;
Chris@82 74 T4e = T2U - T2V;
Chris@82 75 T4z = T3y + T3x;
Chris@82 76 TE = T3 - T6;
Chris@82 77 T1t = T1p - T1s;
Chris@82 78 T2W = T2U + T2V;
Chris@82 79 T3z = T3x - T3y;
Chris@82 80 T2l = T1p + T1s;
Chris@82 81 }
Chris@82 82 {
Chris@82 83 E Te, T4f, T4p, TF, T1a, T2Z, T3o, T2b, TA, T4j, T4t, TJ, T12, T39, T3k;
Chris@82 84 E T2f, Tl, T4g, T4q, TG, T1h, T32, T3r, T2c, Tt, T4i, T4s, TI, TV, T36;
Chris@82 85 E T3h, T2e;
Chris@82 86 {
Chris@82 87 E Ta, T2X, T16, T3m, Td, T3n, T19, T2Y;
Chris@82 88 {
Chris@82 89 E T8, T9, T14, T15;
Chris@82 90 T8 = Rp[WS(rs, 4)];
Chris@82 91 T9 = Rm[WS(rs, 5)];
Chris@82 92 Ta = T8 + T9;
Chris@82 93 T2X = T8 - T9;
Chris@82 94 T14 = Ip[WS(rs, 4)];
Chris@82 95 T15 = Im[WS(rs, 5)];
Chris@82 96 T16 = T14 - T15;
Chris@82 97 T3m = T14 + T15;
Chris@82 98 }
Chris@82 99 {
Chris@82 100 E Tb, Tc, T17, T18;
Chris@82 101 Tb = Rp[WS(rs, 9)];
Chris@82 102 Tc = Rm[0];
Chris@82 103 Td = Tb + Tc;
Chris@82 104 T3n = Tb - Tc;
Chris@82 105 T17 = Ip[WS(rs, 9)];
Chris@82 106 T18 = Im[0];
Chris@82 107 T19 = T17 - T18;
Chris@82 108 T2Y = T17 + T18;
Chris@82 109 }
Chris@82 110 Te = Ta + Td;
Chris@82 111 T4f = T2X - T2Y;
Chris@82 112 T4p = T3n + T3m;
Chris@82 113 TF = Ta - Td;
Chris@82 114 T1a = T16 - T19;
Chris@82 115 T2Z = T2X + T2Y;
Chris@82 116 T3o = T3m - T3n;
Chris@82 117 T2b = T16 + T19;
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E Tw, T37, TY, T3j, Tz, T3i, T11, T38;
Chris@82 121 {
Chris@82 122 E Tu, Tv, TW, TX;
Chris@82 123 Tu = Rm[WS(rs, 7)];
Chris@82 124 Tv = Rp[WS(rs, 2)];
Chris@82 125 Tw = Tu + Tv;
Chris@82 126 T37 = Tu - Tv;
Chris@82 127 TW = Ip[WS(rs, 2)];
Chris@82 128 TX = Im[WS(rs, 7)];
Chris@82 129 TY = TW - TX;
Chris@82 130 T3j = TW + TX;
Chris@82 131 }
Chris@82 132 {
Chris@82 133 E Tx, Ty, TZ, T10;
Chris@82 134 Tx = Rm[WS(rs, 2)];
Chris@82 135 Ty = Rp[WS(rs, 7)];
Chris@82 136 Tz = Tx + Ty;
Chris@82 137 T3i = Tx - Ty;
Chris@82 138 TZ = Ip[WS(rs, 7)];
Chris@82 139 T10 = Im[WS(rs, 2)];
Chris@82 140 T11 = TZ - T10;
Chris@82 141 T38 = TZ + T10;
Chris@82 142 }
Chris@82 143 TA = Tw + Tz;
Chris@82 144 T4j = T37 + T38;
Chris@82 145 T4t = T3i - T3j;
Chris@82 146 TJ = Tw - Tz;
Chris@82 147 T12 = TY - T11;
Chris@82 148 T39 = T37 - T38;
Chris@82 149 T3k = T3i + T3j;
Chris@82 150 T2f = TY + T11;
Chris@82 151 }
Chris@82 152 {
Chris@82 153 E Th, T30, T1d, T3q, Tk, T3p, T1g, T31;
Chris@82 154 {
Chris@82 155 E Tf, Tg, T1b, T1c;
Chris@82 156 Tf = Rm[WS(rs, 3)];
Chris@82 157 Tg = Rp[WS(rs, 6)];
Chris@82 158 Th = Tf + Tg;
Chris@82 159 T30 = Tf - Tg;
Chris@82 160 T1b = Ip[WS(rs, 6)];
Chris@82 161 T1c = Im[WS(rs, 3)];
Chris@82 162 T1d = T1b - T1c;
Chris@82 163 T3q = T1b + T1c;
Chris@82 164 }
Chris@82 165 {
Chris@82 166 E Ti, Tj, T1e, T1f;
Chris@82 167 Ti = Rp[WS(rs, 1)];
Chris@82 168 Tj = Rm[WS(rs, 8)];
Chris@82 169 Tk = Ti + Tj;
Chris@82 170 T3p = Ti - Tj;
Chris@82 171 T1e = Ip[WS(rs, 1)];
Chris@82 172 T1f = Im[WS(rs, 8)];
Chris@82 173 T1g = T1e - T1f;
Chris@82 174 T31 = T1e + T1f;
Chris@82 175 }
Chris@82 176 Tl = Th + Tk;
Chris@82 177 T4g = T30 - T31;
Chris@82 178 T4q = T3p - T3q;
Chris@82 179 TG = Th - Tk;
Chris@82 180 T1h = T1d - T1g;
Chris@82 181 T32 = T30 + T31;
Chris@82 182 T3r = T3p + T3q;
Chris@82 183 T2c = T1d + T1g;
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E Tp, T34, TR, T3f, Ts, T3g, TU, T35;
Chris@82 187 {
Chris@82 188 E Tn, To, TP, TQ;
Chris@82 189 Tn = Rp[WS(rs, 8)];
Chris@82 190 To = Rm[WS(rs, 1)];
Chris@82 191 Tp = Tn + To;
Chris@82 192 T34 = Tn - To;
Chris@82 193 TP = Ip[WS(rs, 8)];
Chris@82 194 TQ = Im[WS(rs, 1)];
Chris@82 195 TR = TP - TQ;
Chris@82 196 T3f = TP + TQ;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E Tq, Tr, TS, TT;
Chris@82 200 Tq = Rm[WS(rs, 6)];
Chris@82 201 Tr = Rp[WS(rs, 3)];
Chris@82 202 Ts = Tq + Tr;
Chris@82 203 T3g = Tq - Tr;
Chris@82 204 TS = Ip[WS(rs, 3)];
Chris@82 205 TT = Im[WS(rs, 6)];
Chris@82 206 TU = TS - TT;
Chris@82 207 T35 = TS + TT;
Chris@82 208 }
Chris@82 209 Tt = Tp + Ts;
Chris@82 210 T4i = T34 + T35;
Chris@82 211 T4s = T3g + T3f;
Chris@82 212 TI = Tp - Ts;
Chris@82 213 TV = TR - TU;
Chris@82 214 T36 = T34 - T35;
Chris@82 215 T3h = T3f - T3g;
Chris@82 216 T2e = TR + TU;
Chris@82 217 }
Chris@82 218 T13 = TV - T12;
Chris@82 219 T3G = T36 - T39;
Chris@82 220 T3H = T2Z - T32;
Chris@82 221 T1i = T1a - T1h;
Chris@82 222 T2g = T2e - T2f;
Chris@82 223 T4H = T4i - T4j;
Chris@82 224 T4G = T4f - T4g;
Chris@82 225 T2d = T2b - T2c;
Chris@82 226 T1B = TF - TG;
Chris@82 227 T4u = T4s - T4t;
Chris@82 228 T4r = T4p - T4q;
Chris@82 229 T1A = TI - TJ;
Chris@82 230 T2s = Te - Tl;
Chris@82 231 T3l = T3h + T3k;
Chris@82 232 T2t = Tt - TA;
Chris@82 233 T3s = T3o + T3r;
Chris@82 234 T2m = T2b + T2c;
Chris@82 235 T2n = T2e + T2f;
Chris@82 236 T2o = T2m + T2n;
Chris@82 237 T1u = T1a + T1h;
Chris@82 238 T1v = TV + T12;
Chris@82 239 T1w = T1u + T1v;
Chris@82 240 {
Chris@82 241 E Tm, TB, TH, TK;
Chris@82 242 Tm = Te + Tl;
Chris@82 243 TB = Tt + TA;
Chris@82 244 TC = Tm + TB;
Chris@82 245 T29 = Tm - TB;
Chris@82 246 {
Chris@82 247 E T3A, T3B, T4h, T4k;
Chris@82 248 T3A = T3o - T3r;
Chris@82 249 T3B = T3h - T3k;
Chris@82 250 T3C = T3A + T3B;
Chris@82 251 T3E = T3A - T3B;
Chris@82 252 T4h = T4f + T4g;
Chris@82 253 T4k = T4i + T4j;
Chris@82 254 T4l = T4h + T4k;
Chris@82 255 T4n = T4h - T4k;
Chris@82 256 }
Chris@82 257 TH = TF + TG;
Chris@82 258 TK = TI + TJ;
Chris@82 259 TL = TH + TK;
Chris@82 260 TN = TH - TK;
Chris@82 261 {
Chris@82 262 E T33, T3a, T4A, T4B;
Chris@82 263 T33 = T2Z + T32;
Chris@82 264 T3a = T36 + T39;
Chris@82 265 T3b = T33 + T3a;
Chris@82 266 T3d = T33 - T3a;
Chris@82 267 T4A = T4p + T4q;
Chris@82 268 T4B = T4s + T4t;
Chris@82 269 T4C = T4A + T4B;
Chris@82 270 T4E = T4A - T4B;
Chris@82 271 }
Chris@82 272 }
Chris@82 273 }
Chris@82 274 Rp[0] = T7 + TC;
Chris@82 275 Rm[0] = T2l + T2o;
Chris@82 276 {
Chris@82 277 E T25, T21, T23, T24, T26, T22;
Chris@82 278 T25 = T1t + T1w;
Chris@82 279 T22 = TE + TL;
Chris@82 280 T21 = W[18];
Chris@82 281 T23 = T21 * T22;
Chris@82 282 T24 = W[19];
Chris@82 283 T26 = T24 * T22;
Chris@82 284 Rp[WS(rs, 5)] = FNMS(T24, T25, T23);
Chris@82 285 Rm[WS(rs, 5)] = FMA(T21, T25, T26);
Chris@82 286 }
Chris@82 287 {
Chris@82 288 E T58, T5b, T59, T5c, T57, T5a;
Chris@82 289 T58 = T4e + T4l;
Chris@82 290 T5b = T4z + T4C;
Chris@82 291 T57 = W[8];
Chris@82 292 T59 = T57 * T58;
Chris@82 293 T5c = T57 * T5b;
Chris@82 294 T5a = W[9];
Chris@82 295 Ip[WS(rs, 2)] = FNMS(T5a, T5b, T59);
Chris@82 296 Im[WS(rs, 2)] = FMA(T5a, T58, T5c);
Chris@82 297 }
Chris@82 298 {
Chris@82 299 E T48, T4b, T49, T4c, T47, T4a;
Chris@82 300 T48 = T2W + T3b;
Chris@82 301 T4b = T3z + T3C;
Chris@82 302 T47 = W[28];
Chris@82 303 T49 = T47 * T48;
Chris@82 304 T4c = T47 * T4b;
Chris@82 305 T4a = W[29];
Chris@82 306 Ip[WS(rs, 7)] = FNMS(T4a, T4b, T49);
Chris@82 307 Im[WS(rs, 7)] = FMA(T4a, T48, T4c);
Chris@82 308 }
Chris@82 309 {
Chris@82 310 E T3u, T42, T3M, T3U, T3J, T45, T3P, T3Z;
Chris@82 311 {
Chris@82 312 E T3t, T3T, T3e, T3S, T3c;
Chris@82 313 T3t = FNMS(KP618033988, T3s, T3l);
Chris@82 314 T3T = FMA(KP618033988, T3l, T3s);
Chris@82 315 T3c = FNMS(KP250000000, T3b, T2W);
Chris@82 316 T3e = FNMS(KP559016994, T3d, T3c);
Chris@82 317 T3S = FMA(KP559016994, T3d, T3c);
Chris@82 318 T3u = FNMS(KP951056516, T3t, T3e);
Chris@82 319 T42 = FMA(KP951056516, T3T, T3S);
Chris@82 320 T3M = FMA(KP951056516, T3t, T3e);
Chris@82 321 T3U = FNMS(KP951056516, T3T, T3S);
Chris@82 322 }
Chris@82 323 {
Chris@82 324 E T3I, T3Y, T3F, T3X, T3D;
Chris@82 325 T3I = FNMS(KP618033988, T3H, T3G);
Chris@82 326 T3Y = FMA(KP618033988, T3G, T3H);
Chris@82 327 T3D = FNMS(KP250000000, T3C, T3z);
Chris@82 328 T3F = FNMS(KP559016994, T3E, T3D);
Chris@82 329 T3X = FMA(KP559016994, T3E, T3D);
Chris@82 330 T3J = FMA(KP951056516, T3I, T3F);
Chris@82 331 T45 = FNMS(KP951056516, T3Y, T3X);
Chris@82 332 T3P = FNMS(KP951056516, T3I, T3F);
Chris@82 333 T3Z = FMA(KP951056516, T3Y, T3X);
Chris@82 334 }
Chris@82 335 {
Chris@82 336 E T3v, T3K, T2T, T3w;
Chris@82 337 T2T = W[4];
Chris@82 338 T3v = T2T * T3u;
Chris@82 339 T3K = T2T * T3J;
Chris@82 340 T3w = W[5];
Chris@82 341 Ip[WS(rs, 1)] = FNMS(T3w, T3J, T3v);
Chris@82 342 Im[WS(rs, 1)] = FMA(T3w, T3u, T3K);
Chris@82 343 }
Chris@82 344 {
Chris@82 345 E T43, T46, T41, T44;
Chris@82 346 T41 = W[36];
Chris@82 347 T43 = T41 * T42;
Chris@82 348 T46 = T41 * T45;
Chris@82 349 T44 = W[37];
Chris@82 350 Ip[WS(rs, 9)] = FNMS(T44, T45, T43);
Chris@82 351 Im[WS(rs, 9)] = FMA(T44, T42, T46);
Chris@82 352 }
Chris@82 353 {
Chris@82 354 E T3N, T3Q, T3L, T3O;
Chris@82 355 T3L = W[12];
Chris@82 356 T3N = T3L * T3M;
Chris@82 357 T3Q = T3L * T3P;
Chris@82 358 T3O = W[13];
Chris@82 359 Ip[WS(rs, 3)] = FNMS(T3O, T3P, T3N);
Chris@82 360 Im[WS(rs, 3)] = FMA(T3O, T3M, T3Q);
Chris@82 361 }
Chris@82 362 {
Chris@82 363 E T3V, T40, T3R, T3W;
Chris@82 364 T3R = W[20];
Chris@82 365 T3V = T3R * T3U;
Chris@82 366 T40 = T3R * T3Z;
Chris@82 367 T3W = W[21];
Chris@82 368 Ip[WS(rs, 5)] = FNMS(T3W, T3Z, T3V);
Chris@82 369 Im[WS(rs, 5)] = FMA(T3W, T3U, T40);
Chris@82 370 }
Chris@82 371 }
Chris@82 372 {
Chris@82 373 E T4w, T52, T4M, T4U, T4J, T55, T4P, T4Z;
Chris@82 374 {
Chris@82 375 E T4v, T4T, T4o, T4S, T4m;
Chris@82 376 T4v = FMA(KP618033988, T4u, T4r);
Chris@82 377 T4T = FNMS(KP618033988, T4r, T4u);
Chris@82 378 T4m = FNMS(KP250000000, T4l, T4e);
Chris@82 379 T4o = FMA(KP559016994, T4n, T4m);
Chris@82 380 T4S = FNMS(KP559016994, T4n, T4m);
Chris@82 381 T4w = FNMS(KP951056516, T4v, T4o);
Chris@82 382 T52 = FMA(KP951056516, T4T, T4S);
Chris@82 383 T4M = FMA(KP951056516, T4v, T4o);
Chris@82 384 T4U = FNMS(KP951056516, T4T, T4S);
Chris@82 385 }
Chris@82 386 {
Chris@82 387 E T4I, T4Y, T4F, T4X, T4D;
Chris@82 388 T4I = FMA(KP618033988, T4H, T4G);
Chris@82 389 T4Y = FNMS(KP618033988, T4G, T4H);
Chris@82 390 T4D = FNMS(KP250000000, T4C, T4z);
Chris@82 391 T4F = FMA(KP559016994, T4E, T4D);
Chris@82 392 T4X = FNMS(KP559016994, T4E, T4D);
Chris@82 393 T4J = FMA(KP951056516, T4I, T4F);
Chris@82 394 T55 = FNMS(KP951056516, T4Y, T4X);
Chris@82 395 T4P = FNMS(KP951056516, T4I, T4F);
Chris@82 396 T4Z = FMA(KP951056516, T4Y, T4X);
Chris@82 397 }
Chris@82 398 {
Chris@82 399 E T4x, T4K, T4d, T4y;
Chris@82 400 T4d = W[0];
Chris@82 401 T4x = T4d * T4w;
Chris@82 402 T4K = T4d * T4J;
Chris@82 403 T4y = W[1];
Chris@82 404 Ip[0] = FNMS(T4y, T4J, T4x);
Chris@82 405 Im[0] = FMA(T4y, T4w, T4K);
Chris@82 406 }
Chris@82 407 {
Chris@82 408 E T53, T56, T51, T54;
Chris@82 409 T51 = W[32];
Chris@82 410 T53 = T51 * T52;
Chris@82 411 T56 = T51 * T55;
Chris@82 412 T54 = W[33];
Chris@82 413 Ip[WS(rs, 8)] = FNMS(T54, T55, T53);
Chris@82 414 Im[WS(rs, 8)] = FMA(T54, T52, T56);
Chris@82 415 }
Chris@82 416 {
Chris@82 417 E T4N, T4Q, T4L, T4O;
Chris@82 418 T4L = W[16];
Chris@82 419 T4N = T4L * T4M;
Chris@82 420 T4Q = T4L * T4P;
Chris@82 421 T4O = W[17];
Chris@82 422 Ip[WS(rs, 4)] = FNMS(T4O, T4P, T4N);
Chris@82 423 Im[WS(rs, 4)] = FMA(T4O, T4M, T4Q);
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E T4V, T50, T4R, T4W;
Chris@82 427 T4R = W[24];
Chris@82 428 T4V = T4R * T4U;
Chris@82 429 T50 = T4R * T4Z;
Chris@82 430 T4W = W[25];
Chris@82 431 Ip[WS(rs, 6)] = FNMS(T4W, T4Z, T4V);
Chris@82 432 Im[WS(rs, 6)] = FMA(T4W, T4U, T50);
Chris@82 433 }
Chris@82 434 }
Chris@82 435 {
Chris@82 436 E T2u, T2K, T2r, T2J, T2i, T2O, T2y, T2G, T2p, T2q;
Chris@82 437 T2u = FMA(KP618033988, T2t, T2s);
Chris@82 438 T2K = FNMS(KP618033988, T2s, T2t);
Chris@82 439 T2p = FNMS(KP250000000, T2o, T2l);
Chris@82 440 T2q = T2m - T2n;
Chris@82 441 T2r = FMA(KP559016994, T2q, T2p);
Chris@82 442 T2J = FNMS(KP559016994, T2q, T2p);
Chris@82 443 {
Chris@82 444 E T2h, T2F, T2a, T2E, T28;
Chris@82 445 T2h = FMA(KP618033988, T2g, T2d);
Chris@82 446 T2F = FNMS(KP618033988, T2d, T2g);
Chris@82 447 T28 = FNMS(KP250000000, TC, T7);
Chris@82 448 T2a = FMA(KP559016994, T29, T28);
Chris@82 449 T2E = FNMS(KP559016994, T29, T28);
Chris@82 450 T2i = FMA(KP951056516, T2h, T2a);
Chris@82 451 T2O = FMA(KP951056516, T2F, T2E);
Chris@82 452 T2y = FNMS(KP951056516, T2h, T2a);
Chris@82 453 T2G = FNMS(KP951056516, T2F, T2E);
Chris@82 454 }
Chris@82 455 {
Chris@82 456 E T2v, T2k, T2w, T27, T2j;
Chris@82 457 T2v = FNMS(KP951056516, T2u, T2r);
Chris@82 458 T2k = W[7];
Chris@82 459 T2w = T2k * T2i;
Chris@82 460 T27 = W[6];
Chris@82 461 T2j = T27 * T2i;
Chris@82 462 Rp[WS(rs, 2)] = FNMS(T2k, T2v, T2j);
Chris@82 463 Rm[WS(rs, 2)] = FMA(T27, T2v, T2w);
Chris@82 464 }
Chris@82 465 {
Chris@82 466 E T2R, T2Q, T2S, T2N, T2P;
Chris@82 467 T2R = FNMS(KP951056516, T2K, T2J);
Chris@82 468 T2Q = W[23];
Chris@82 469 T2S = T2Q * T2O;
Chris@82 470 T2N = W[22];
Chris@82 471 T2P = T2N * T2O;
Chris@82 472 Rp[WS(rs, 6)] = FNMS(T2Q, T2R, T2P);
Chris@82 473 Rm[WS(rs, 6)] = FMA(T2N, T2R, T2S);
Chris@82 474 }
Chris@82 475 {
Chris@82 476 E T2B, T2A, T2C, T2x, T2z;
Chris@82 477 T2B = FMA(KP951056516, T2u, T2r);
Chris@82 478 T2A = W[31];
Chris@82 479 T2C = T2A * T2y;
Chris@82 480 T2x = W[30];
Chris@82 481 T2z = T2x * T2y;
Chris@82 482 Rp[WS(rs, 8)] = FNMS(T2A, T2B, T2z);
Chris@82 483 Rm[WS(rs, 8)] = FMA(T2x, T2B, T2C);
Chris@82 484 }
Chris@82 485 {
Chris@82 486 E T2L, T2I, T2M, T2D, T2H;
Chris@82 487 T2L = FMA(KP951056516, T2K, T2J);
Chris@82 488 T2I = W[15];
Chris@82 489 T2M = T2I * T2G;
Chris@82 490 T2D = W[14];
Chris@82 491 T2H = T2D * T2G;
Chris@82 492 Rp[WS(rs, 4)] = FNMS(T2I, T2L, T2H);
Chris@82 493 Rm[WS(rs, 4)] = FMA(T2D, T2L, T2M);
Chris@82 494 }
Chris@82 495 }
Chris@82 496 {
Chris@82 497 E T1C, T1S, T1z, T1R, T1k, T1W, T1G, T1O, T1x, T1y;
Chris@82 498 T1C = FNMS(KP618033988, T1B, T1A);
Chris@82 499 T1S = FMA(KP618033988, T1A, T1B);
Chris@82 500 T1x = FNMS(KP250000000, T1w, T1t);
Chris@82 501 T1y = T1u - T1v;
Chris@82 502 T1z = FNMS(KP559016994, T1y, T1x);
Chris@82 503 T1R = FMA(KP559016994, T1y, T1x);
Chris@82 504 {
Chris@82 505 E T1j, T1N, TO, T1M, TM;
Chris@82 506 T1j = FNMS(KP618033988, T1i, T13);
Chris@82 507 T1N = FMA(KP618033988, T13, T1i);
Chris@82 508 TM = FNMS(KP250000000, TL, TE);
Chris@82 509 TO = FNMS(KP559016994, TN, TM);
Chris@82 510 T1M = FMA(KP559016994, TN, TM);
Chris@82 511 T1k = FMA(KP951056516, T1j, TO);
Chris@82 512 T1W = FMA(KP951056516, T1N, T1M);
Chris@82 513 T1G = FNMS(KP951056516, T1j, TO);
Chris@82 514 T1O = FNMS(KP951056516, T1N, T1M);
Chris@82 515 }
Chris@82 516 {
Chris@82 517 E T1D, T1m, T1E, TD, T1l;
Chris@82 518 T1D = FNMS(KP951056516, T1C, T1z);
Chris@82 519 T1m = W[3];
Chris@82 520 T1E = T1m * T1k;
Chris@82 521 TD = W[2];
Chris@82 522 T1l = TD * T1k;
Chris@82 523 Rp[WS(rs, 1)] = FNMS(T1m, T1D, T1l);
Chris@82 524 Rm[WS(rs, 1)] = FMA(TD, T1D, T1E);
Chris@82 525 }
Chris@82 526 {
Chris@82 527 E T1Z, T1Y, T20, T1V, T1X;
Chris@82 528 T1Z = FNMS(KP951056516, T1S, T1R);
Chris@82 529 T1Y = W[27];
Chris@82 530 T20 = T1Y * T1W;
Chris@82 531 T1V = W[26];
Chris@82 532 T1X = T1V * T1W;
Chris@82 533 Rp[WS(rs, 7)] = FNMS(T1Y, T1Z, T1X);
Chris@82 534 Rm[WS(rs, 7)] = FMA(T1V, T1Z, T20);
Chris@82 535 }
Chris@82 536 {
Chris@82 537 E T1J, T1I, T1K, T1F, T1H;
Chris@82 538 T1J = FMA(KP951056516, T1C, T1z);
Chris@82 539 T1I = W[35];
Chris@82 540 T1K = T1I * T1G;
Chris@82 541 T1F = W[34];
Chris@82 542 T1H = T1F * T1G;
Chris@82 543 Rp[WS(rs, 9)] = FNMS(T1I, T1J, T1H);
Chris@82 544 Rm[WS(rs, 9)] = FMA(T1F, T1J, T1K);
Chris@82 545 }
Chris@82 546 {
Chris@82 547 E T1T, T1Q, T1U, T1L, T1P;
Chris@82 548 T1T = FMA(KP951056516, T1S, T1R);
Chris@82 549 T1Q = W[11];
Chris@82 550 T1U = T1Q * T1O;
Chris@82 551 T1L = W[10];
Chris@82 552 T1P = T1L * T1O;
Chris@82 553 Rp[WS(rs, 3)] = FNMS(T1Q, T1T, T1P);
Chris@82 554 Rm[WS(rs, 3)] = FMA(T1L, T1T, T1U);
Chris@82 555 }
Chris@82 556 }
Chris@82 557 }
Chris@82 558 }
Chris@82 559 }
Chris@82 560
Chris@82 561 static const tw_instr twinstr[] = {
Chris@82 562 {TW_FULL, 1, 20},
Chris@82 563 {TW_NEXT, 1, 0}
Chris@82 564 };
Chris@82 565
Chris@82 566 static const hc2c_desc desc = { 20, "hc2cb_20", twinstr, &GENUS, {136, 38, 110, 0} };
Chris@82 567
Chris@82 568 void X(codelet_hc2cb_20) (planner *p) {
Chris@82 569 X(khc2c_register) (p, hc2cb_20, &desc, HC2C_VIA_RDFT);
Chris@82 570 }
Chris@82 571 #else
Chris@82 572
Chris@82 573 /* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -dif -name hc2cb_20 -include rdft/scalar/hc2cb.h */
Chris@82 574
Chris@82 575 /*
Chris@82 576 * This function contains 246 FP additions, 124 FP multiplications,
Chris@82 577 * (or, 184 additions, 62 multiplications, 62 fused multiply/add),
Chris@82 578 * 97 stack variables, 4 constants, and 80 memory accesses
Chris@82 579 */
Chris@82 580 #include "rdft/scalar/hc2cb.h"
Chris@82 581
Chris@82 582 static void hc2cb_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 583 {
Chris@82 584 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 585 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 586 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 587 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 588 {
Chris@82 589 INT m;
Chris@82 590 for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(80, rs)) {
Chris@82 591 E T7, T3T, T49, TE, T1v, T2T, T3g, T2d, T13, T3n, T3o, T1i, T26, T4e, T4d;
Chris@82 592 E T23, T1n, T42, T3Z, T1m, T2h, T2I, T2i, T2P, T30, T37, T38, Tm, TB, TC;
Chris@82 593 E T46, T47, T4a, T2a, T2b, T2e, T1w, T1x, T1y, T3O, T3R, T3U, T3h, T3i, T3j;
Chris@82 594 E TH, TK, TL;
Chris@82 595 {
Chris@82 596 E T3, T2R, T1r, T3e, T6, T3f, T1u, T2S;
Chris@82 597 {
Chris@82 598 E T1, T2, T1p, T1q;
Chris@82 599 T1 = Rp[0];
Chris@82 600 T2 = Rm[WS(rs, 9)];
Chris@82 601 T3 = T1 + T2;
Chris@82 602 T2R = T1 - T2;
Chris@82 603 T1p = Ip[0];
Chris@82 604 T1q = Im[WS(rs, 9)];
Chris@82 605 T1r = T1p - T1q;
Chris@82 606 T3e = T1p + T1q;
Chris@82 607 }
Chris@82 608 {
Chris@82 609 E T4, T5, T1s, T1t;
Chris@82 610 T4 = Rp[WS(rs, 5)];
Chris@82 611 T5 = Rm[WS(rs, 4)];
Chris@82 612 T6 = T4 + T5;
Chris@82 613 T3f = T4 - T5;
Chris@82 614 T1s = Ip[WS(rs, 5)];
Chris@82 615 T1t = Im[WS(rs, 4)];
Chris@82 616 T1u = T1s - T1t;
Chris@82 617 T2S = T1s + T1t;
Chris@82 618 }
Chris@82 619 T7 = T3 + T6;
Chris@82 620 T3T = T2R - T2S;
Chris@82 621 T49 = T3f + T3e;
Chris@82 622 TE = T3 - T6;
Chris@82 623 T1v = T1r - T1u;
Chris@82 624 T2T = T2R + T2S;
Chris@82 625 T3g = T3e - T3f;
Chris@82 626 T2d = T1r + T1u;
Chris@82 627 }
Chris@82 628 {
Chris@82 629 E Te, T3M, T3X, TF, TV, T2E, T2W, T21, TA, T3Q, T41, TJ, T1h, T2O, T36;
Chris@82 630 E T25, Tl, T3N, T3Y, TG, T12, T2H, T2Z, T22, Tt, T3P, T40, TI, T1a, T2L;
Chris@82 631 E T33, T24;
Chris@82 632 {
Chris@82 633 E Ta, T2U, TR, T2C, Td, T2D, TU, T2V;
Chris@82 634 {
Chris@82 635 E T8, T9, TP, TQ;
Chris@82 636 T8 = Rp[WS(rs, 4)];
Chris@82 637 T9 = Rm[WS(rs, 5)];
Chris@82 638 Ta = T8 + T9;
Chris@82 639 T2U = T8 - T9;
Chris@82 640 TP = Ip[WS(rs, 4)];
Chris@82 641 TQ = Im[WS(rs, 5)];
Chris@82 642 TR = TP - TQ;
Chris@82 643 T2C = TP + TQ;
Chris@82 644 }
Chris@82 645 {
Chris@82 646 E Tb, Tc, TS, TT;
Chris@82 647 Tb = Rp[WS(rs, 9)];
Chris@82 648 Tc = Rm[0];
Chris@82 649 Td = Tb + Tc;
Chris@82 650 T2D = Tb - Tc;
Chris@82 651 TS = Ip[WS(rs, 9)];
Chris@82 652 TT = Im[0];
Chris@82 653 TU = TS - TT;
Chris@82 654 T2V = TS + TT;
Chris@82 655 }
Chris@82 656 Te = Ta + Td;
Chris@82 657 T3M = T2U - T2V;
Chris@82 658 T3X = T2D + T2C;
Chris@82 659 TF = Ta - Td;
Chris@82 660 TV = TR - TU;
Chris@82 661 T2E = T2C - T2D;
Chris@82 662 T2W = T2U + T2V;
Chris@82 663 T21 = TR + TU;
Chris@82 664 }
Chris@82 665 {
Chris@82 666 E Tw, T34, T1d, T2N, Tz, T2M, T1g, T35;
Chris@82 667 {
Chris@82 668 E Tu, Tv, T1b, T1c;
Chris@82 669 Tu = Rm[WS(rs, 7)];
Chris@82 670 Tv = Rp[WS(rs, 2)];
Chris@82 671 Tw = Tu + Tv;
Chris@82 672 T34 = Tu - Tv;
Chris@82 673 T1b = Ip[WS(rs, 2)];
Chris@82 674 T1c = Im[WS(rs, 7)];
Chris@82 675 T1d = T1b - T1c;
Chris@82 676 T2N = T1b + T1c;
Chris@82 677 }
Chris@82 678 {
Chris@82 679 E Tx, Ty, T1e, T1f;
Chris@82 680 Tx = Rm[WS(rs, 2)];
Chris@82 681 Ty = Rp[WS(rs, 7)];
Chris@82 682 Tz = Tx + Ty;
Chris@82 683 T2M = Tx - Ty;
Chris@82 684 T1e = Ip[WS(rs, 7)];
Chris@82 685 T1f = Im[WS(rs, 2)];
Chris@82 686 T1g = T1e - T1f;
Chris@82 687 T35 = T1e + T1f;
Chris@82 688 }
Chris@82 689 TA = Tw + Tz;
Chris@82 690 T3Q = T34 + T35;
Chris@82 691 T41 = T2M - T2N;
Chris@82 692 TJ = Tw - Tz;
Chris@82 693 T1h = T1d - T1g;
Chris@82 694 T2O = T2M + T2N;
Chris@82 695 T36 = T34 - T35;
Chris@82 696 T25 = T1d + T1g;
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E Th, T2X, TY, T2G, Tk, T2F, T11, T2Y;
Chris@82 700 {
Chris@82 701 E Tf, Tg, TW, TX;
Chris@82 702 Tf = Rm[WS(rs, 3)];
Chris@82 703 Tg = Rp[WS(rs, 6)];
Chris@82 704 Th = Tf + Tg;
Chris@82 705 T2X = Tf - Tg;
Chris@82 706 TW = Ip[WS(rs, 6)];
Chris@82 707 TX = Im[WS(rs, 3)];
Chris@82 708 TY = TW - TX;
Chris@82 709 T2G = TW + TX;
Chris@82 710 }
Chris@82 711 {
Chris@82 712 E Ti, Tj, TZ, T10;
Chris@82 713 Ti = Rp[WS(rs, 1)];
Chris@82 714 Tj = Rm[WS(rs, 8)];
Chris@82 715 Tk = Ti + Tj;
Chris@82 716 T2F = Ti - Tj;
Chris@82 717 TZ = Ip[WS(rs, 1)];
Chris@82 718 T10 = Im[WS(rs, 8)];
Chris@82 719 T11 = TZ - T10;
Chris@82 720 T2Y = TZ + T10;
Chris@82 721 }
Chris@82 722 Tl = Th + Tk;
Chris@82 723 T3N = T2X - T2Y;
Chris@82 724 T3Y = T2F - T2G;
Chris@82 725 TG = Th - Tk;
Chris@82 726 T12 = TY - T11;
Chris@82 727 T2H = T2F + T2G;
Chris@82 728 T2Z = T2X + T2Y;
Chris@82 729 T22 = TY + T11;
Chris@82 730 }
Chris@82 731 {
Chris@82 732 E Tp, T31, T16, T2J, Ts, T2K, T19, T32;
Chris@82 733 {
Chris@82 734 E Tn, To, T14, T15;
Chris@82 735 Tn = Rp[WS(rs, 8)];
Chris@82 736 To = Rm[WS(rs, 1)];
Chris@82 737 Tp = Tn + To;
Chris@82 738 T31 = Tn - To;
Chris@82 739 T14 = Ip[WS(rs, 8)];
Chris@82 740 T15 = Im[WS(rs, 1)];
Chris@82 741 T16 = T14 - T15;
Chris@82 742 T2J = T14 + T15;
Chris@82 743 }
Chris@82 744 {
Chris@82 745 E Tq, Tr, T17, T18;
Chris@82 746 Tq = Rm[WS(rs, 6)];
Chris@82 747 Tr = Rp[WS(rs, 3)];
Chris@82 748 Ts = Tq + Tr;
Chris@82 749 T2K = Tq - Tr;
Chris@82 750 T17 = Ip[WS(rs, 3)];
Chris@82 751 T18 = Im[WS(rs, 6)];
Chris@82 752 T19 = T17 - T18;
Chris@82 753 T32 = T17 + T18;
Chris@82 754 }
Chris@82 755 Tt = Tp + Ts;
Chris@82 756 T3P = T31 + T32;
Chris@82 757 T40 = T2K + T2J;
Chris@82 758 TI = Tp - Ts;
Chris@82 759 T1a = T16 - T19;
Chris@82 760 T2L = T2J - T2K;
Chris@82 761 T33 = T31 - T32;
Chris@82 762 T24 = T16 + T19;
Chris@82 763 }
Chris@82 764 T13 = TV - T12;
Chris@82 765 T3n = T2W - T2Z;
Chris@82 766 T3o = T33 - T36;
Chris@82 767 T1i = T1a - T1h;
Chris@82 768 T26 = T24 - T25;
Chris@82 769 T4e = T3P - T3Q;
Chris@82 770 T4d = T3M - T3N;
Chris@82 771 T23 = T21 - T22;
Chris@82 772 T1n = TI - TJ;
Chris@82 773 T42 = T40 - T41;
Chris@82 774 T3Z = T3X - T3Y;
Chris@82 775 T1m = TF - TG;
Chris@82 776 T2h = Te - Tl;
Chris@82 777 T2I = T2E + T2H;
Chris@82 778 T2i = Tt - TA;
Chris@82 779 T2P = T2L + T2O;
Chris@82 780 T30 = T2W + T2Z;
Chris@82 781 T37 = T33 + T36;
Chris@82 782 T38 = T30 + T37;
Chris@82 783 Tm = Te + Tl;
Chris@82 784 TB = Tt + TA;
Chris@82 785 TC = Tm + TB;
Chris@82 786 T46 = T3X + T3Y;
Chris@82 787 T47 = T40 + T41;
Chris@82 788 T4a = T46 + T47;
Chris@82 789 T2a = T21 + T22;
Chris@82 790 T2b = T24 + T25;
Chris@82 791 T2e = T2a + T2b;
Chris@82 792 T1w = TV + T12;
Chris@82 793 T1x = T1a + T1h;
Chris@82 794 T1y = T1w + T1x;
Chris@82 795 T3O = T3M + T3N;
Chris@82 796 T3R = T3P + T3Q;
Chris@82 797 T3U = T3O + T3R;
Chris@82 798 T3h = T2E - T2H;
Chris@82 799 T3i = T2L - T2O;
Chris@82 800 T3j = T3h + T3i;
Chris@82 801 TH = TF + TG;
Chris@82 802 TK = TI + TJ;
Chris@82 803 TL = TH + TK;
Chris@82 804 }
Chris@82 805 Rp[0] = T7 + TC;
Chris@82 806 Rm[0] = T2d + T2e;
Chris@82 807 {
Chris@82 808 E T1U, T1W, T1T, T1V;
Chris@82 809 T1U = TE + TL;
Chris@82 810 T1W = T1v + T1y;
Chris@82 811 T1T = W[18];
Chris@82 812 T1V = W[19];
Chris@82 813 Rp[WS(rs, 5)] = FNMS(T1V, T1W, T1T * T1U);
Chris@82 814 Rm[WS(rs, 5)] = FMA(T1V, T1U, T1T * T1W);
Chris@82 815 }
Chris@82 816 {
Chris@82 817 E T4y, T4A, T4x, T4z;
Chris@82 818 T4y = T3T + T3U;
Chris@82 819 T4A = T49 + T4a;
Chris@82 820 T4x = W[8];
Chris@82 821 T4z = W[9];
Chris@82 822 Ip[WS(rs, 2)] = FNMS(T4z, T4A, T4x * T4y);
Chris@82 823 Im[WS(rs, 2)] = FMA(T4x, T4A, T4z * T4y);
Chris@82 824 }
Chris@82 825 {
Chris@82 826 E T3I, T3K, T3H, T3J;
Chris@82 827 T3I = T2T + T38;
Chris@82 828 T3K = T3g + T3j;
Chris@82 829 T3H = W[28];
Chris@82 830 T3J = W[29];
Chris@82 831 Ip[WS(rs, 7)] = FNMS(T3J, T3K, T3H * T3I);
Chris@82 832 Im[WS(rs, 7)] = FMA(T3H, T3K, T3J * T3I);
Chris@82 833 }
Chris@82 834 {
Chris@82 835 E T27, T2j, T2v, T2r, T2g, T2u, T20, T2q;
Chris@82 836 T27 = FMA(KP951056516, T23, KP587785252 * T26);
Chris@82 837 T2j = FMA(KP951056516, T2h, KP587785252 * T2i);
Chris@82 838 T2v = FNMS(KP951056516, T2i, KP587785252 * T2h);
Chris@82 839 T2r = FNMS(KP951056516, T26, KP587785252 * T23);
Chris@82 840 {
Chris@82 841 E T2c, T2f, T1Y, T1Z;
Chris@82 842 T2c = KP559016994 * (T2a - T2b);
Chris@82 843 T2f = FNMS(KP250000000, T2e, T2d);
Chris@82 844 T2g = T2c + T2f;
Chris@82 845 T2u = T2f - T2c;
Chris@82 846 T1Y = KP559016994 * (Tm - TB);
Chris@82 847 T1Z = FNMS(KP250000000, TC, T7);
Chris@82 848 T20 = T1Y + T1Z;
Chris@82 849 T2q = T1Z - T1Y;
Chris@82 850 }
Chris@82 851 {
Chris@82 852 E T28, T2k, T1X, T29;
Chris@82 853 T28 = T20 + T27;
Chris@82 854 T2k = T2g - T2j;
Chris@82 855 T1X = W[6];
Chris@82 856 T29 = W[7];
Chris@82 857 Rp[WS(rs, 2)] = FNMS(T29, T2k, T1X * T28);
Chris@82 858 Rm[WS(rs, 2)] = FMA(T29, T28, T1X * T2k);
Chris@82 859 }
Chris@82 860 {
Chris@82 861 E T2y, T2A, T2x, T2z;
Chris@82 862 T2y = T2q - T2r;
Chris@82 863 T2A = T2v + T2u;
Chris@82 864 T2x = W[22];
Chris@82 865 T2z = W[23];
Chris@82 866 Rp[WS(rs, 6)] = FNMS(T2z, T2A, T2x * T2y);
Chris@82 867 Rm[WS(rs, 6)] = FMA(T2z, T2y, T2x * T2A);
Chris@82 868 }
Chris@82 869 {
Chris@82 870 E T2m, T2o, T2l, T2n;
Chris@82 871 T2m = T20 - T27;
Chris@82 872 T2o = T2j + T2g;
Chris@82 873 T2l = W[30];
Chris@82 874 T2n = W[31];
Chris@82 875 Rp[WS(rs, 8)] = FNMS(T2n, T2o, T2l * T2m);
Chris@82 876 Rm[WS(rs, 8)] = FMA(T2n, T2m, T2l * T2o);
Chris@82 877 }
Chris@82 878 {
Chris@82 879 E T2s, T2w, T2p, T2t;
Chris@82 880 T2s = T2q + T2r;
Chris@82 881 T2w = T2u - T2v;
Chris@82 882 T2p = W[14];
Chris@82 883 T2t = W[15];
Chris@82 884 Rp[WS(rs, 4)] = FNMS(T2t, T2w, T2p * T2s);
Chris@82 885 Rm[WS(rs, 4)] = FMA(T2t, T2s, T2p * T2w);
Chris@82 886 }
Chris@82 887 }
Chris@82 888 {
Chris@82 889 E T43, T4f, T4r, T4m, T4c, T4q, T3W, T4n;
Chris@82 890 T43 = FMA(KP951056516, T3Z, KP587785252 * T42);
Chris@82 891 T4f = FMA(KP951056516, T4d, KP587785252 * T4e);
Chris@82 892 T4r = FNMS(KP951056516, T4e, KP587785252 * T4d);
Chris@82 893 T4m = FNMS(KP951056516, T42, KP587785252 * T3Z);
Chris@82 894 {
Chris@82 895 E T48, T4b, T3S, T3V;
Chris@82 896 T48 = KP559016994 * (T46 - T47);
Chris@82 897 T4b = FNMS(KP250000000, T4a, T49);
Chris@82 898 T4c = T48 + T4b;
Chris@82 899 T4q = T4b - T48;
Chris@82 900 T3S = KP559016994 * (T3O - T3R);
Chris@82 901 T3V = FNMS(KP250000000, T3U, T3T);
Chris@82 902 T3W = T3S + T3V;
Chris@82 903 T4n = T3V - T3S;
Chris@82 904 }
Chris@82 905 {
Chris@82 906 E T44, T4g, T3L, T45;
Chris@82 907 T44 = T3W - T43;
Chris@82 908 T4g = T4c + T4f;
Chris@82 909 T3L = W[0];
Chris@82 910 T45 = W[1];
Chris@82 911 Ip[0] = FNMS(T45, T4g, T3L * T44);
Chris@82 912 Im[0] = FMA(T3L, T4g, T45 * T44);
Chris@82 913 }
Chris@82 914 {
Chris@82 915 E T4u, T4w, T4t, T4v;
Chris@82 916 T4u = T4n - T4m;
Chris@82 917 T4w = T4q + T4r;
Chris@82 918 T4t = W[32];
Chris@82 919 T4v = W[33];
Chris@82 920 Ip[WS(rs, 8)] = FNMS(T4v, T4w, T4t * T4u);
Chris@82 921 Im[WS(rs, 8)] = FMA(T4t, T4w, T4v * T4u);
Chris@82 922 }
Chris@82 923 {
Chris@82 924 E T4i, T4k, T4h, T4j;
Chris@82 925 T4i = T43 + T3W;
Chris@82 926 T4k = T4c - T4f;
Chris@82 927 T4h = W[16];
Chris@82 928 T4j = W[17];
Chris@82 929 Ip[WS(rs, 4)] = FNMS(T4j, T4k, T4h * T4i);
Chris@82 930 Im[WS(rs, 4)] = FMA(T4h, T4k, T4j * T4i);
Chris@82 931 }
Chris@82 932 {
Chris@82 933 E T4o, T4s, T4l, T4p;
Chris@82 934 T4o = T4m + T4n;
Chris@82 935 T4s = T4q - T4r;
Chris@82 936 T4l = W[24];
Chris@82 937 T4p = W[25];
Chris@82 938 Ip[WS(rs, 6)] = FNMS(T4p, T4s, T4l * T4o);
Chris@82 939 Im[WS(rs, 6)] = FMA(T4l, T4s, T4p * T4o);
Chris@82 940 }
Chris@82 941 }
Chris@82 942 {
Chris@82 943 E T1j, T1o, T1M, T1J, T1B, T1N, TO, T1I;
Chris@82 944 T1j = FNMS(KP951056516, T1i, KP587785252 * T13);
Chris@82 945 T1o = FNMS(KP951056516, T1n, KP587785252 * T1m);
Chris@82 946 T1M = FMA(KP951056516, T1m, KP587785252 * T1n);
Chris@82 947 T1J = FMA(KP951056516, T13, KP587785252 * T1i);
Chris@82 948 {
Chris@82 949 E T1z, T1A, TM, TN;
Chris@82 950 T1z = FNMS(KP250000000, T1y, T1v);
Chris@82 951 T1A = KP559016994 * (T1w - T1x);
Chris@82 952 T1B = T1z - T1A;
Chris@82 953 T1N = T1A + T1z;
Chris@82 954 TM = FNMS(KP250000000, TL, TE);
Chris@82 955 TN = KP559016994 * (TH - TK);
Chris@82 956 TO = TM - TN;
Chris@82 957 T1I = TN + TM;
Chris@82 958 }
Chris@82 959 {
Chris@82 960 E T1k, T1C, TD, T1l;
Chris@82 961 T1k = TO - T1j;
Chris@82 962 T1C = T1o + T1B;
Chris@82 963 TD = W[2];
Chris@82 964 T1l = W[3];
Chris@82 965 Rp[WS(rs, 1)] = FNMS(T1l, T1C, TD * T1k);
Chris@82 966 Rm[WS(rs, 1)] = FMA(T1l, T1k, TD * T1C);
Chris@82 967 }
Chris@82 968 {
Chris@82 969 E T1Q, T1S, T1P, T1R;
Chris@82 970 T1Q = T1I + T1J;
Chris@82 971 T1S = T1N - T1M;
Chris@82 972 T1P = W[26];
Chris@82 973 T1R = W[27];
Chris@82 974 Rp[WS(rs, 7)] = FNMS(T1R, T1S, T1P * T1Q);
Chris@82 975 Rm[WS(rs, 7)] = FMA(T1R, T1Q, T1P * T1S);
Chris@82 976 }
Chris@82 977 {
Chris@82 978 E T1E, T1G, T1D, T1F;
Chris@82 979 T1E = TO + T1j;
Chris@82 980 T1G = T1B - T1o;
Chris@82 981 T1D = W[34];
Chris@82 982 T1F = W[35];
Chris@82 983 Rp[WS(rs, 9)] = FNMS(T1F, T1G, T1D * T1E);
Chris@82 984 Rm[WS(rs, 9)] = FMA(T1F, T1E, T1D * T1G);
Chris@82 985 }
Chris@82 986 {
Chris@82 987 E T1K, T1O, T1H, T1L;
Chris@82 988 T1K = T1I - T1J;
Chris@82 989 T1O = T1M + T1N;
Chris@82 990 T1H = W[10];
Chris@82 991 T1L = W[11];
Chris@82 992 Rp[WS(rs, 3)] = FNMS(T1L, T1O, T1H * T1K);
Chris@82 993 Rm[WS(rs, 3)] = FMA(T1L, T1K, T1H * T1O);
Chris@82 994 }
Chris@82 995 }
Chris@82 996 {
Chris@82 997 E T2Q, T3p, T3B, T3x, T3m, T3A, T3b, T3w;
Chris@82 998 T2Q = FNMS(KP951056516, T2P, KP587785252 * T2I);
Chris@82 999 T3p = FNMS(KP951056516, T3o, KP587785252 * T3n);
Chris@82 1000 T3B = FMA(KP951056516, T3n, KP587785252 * T3o);
Chris@82 1001 T3x = FMA(KP951056516, T2I, KP587785252 * T2P);
Chris@82 1002 {
Chris@82 1003 E T3k, T3l, T39, T3a;
Chris@82 1004 T3k = FNMS(KP250000000, T3j, T3g);
Chris@82 1005 T3l = KP559016994 * (T3h - T3i);
Chris@82 1006 T3m = T3k - T3l;
Chris@82 1007 T3A = T3l + T3k;
Chris@82 1008 T39 = FNMS(KP250000000, T38, T2T);
Chris@82 1009 T3a = KP559016994 * (T30 - T37);
Chris@82 1010 T3b = T39 - T3a;
Chris@82 1011 T3w = T3a + T39;
Chris@82 1012 }
Chris@82 1013 {
Chris@82 1014 E T3c, T3q, T2B, T3d;
Chris@82 1015 T3c = T2Q + T3b;
Chris@82 1016 T3q = T3m - T3p;
Chris@82 1017 T2B = W[4];
Chris@82 1018 T3d = W[5];
Chris@82 1019 Ip[WS(rs, 1)] = FNMS(T3d, T3q, T2B * T3c);
Chris@82 1020 Im[WS(rs, 1)] = FMA(T2B, T3q, T3d * T3c);
Chris@82 1021 }
Chris@82 1022 {
Chris@82 1023 E T3E, T3G, T3D, T3F;
Chris@82 1024 T3E = T3x + T3w;
Chris@82 1025 T3G = T3A - T3B;
Chris@82 1026 T3D = W[36];
Chris@82 1027 T3F = W[37];
Chris@82 1028 Ip[WS(rs, 9)] = FNMS(T3F, T3G, T3D * T3E);
Chris@82 1029 Im[WS(rs, 9)] = FMA(T3D, T3G, T3F * T3E);
Chris@82 1030 }
Chris@82 1031 {
Chris@82 1032 E T3s, T3u, T3r, T3t;
Chris@82 1033 T3s = T3b - T2Q;
Chris@82 1034 T3u = T3m + T3p;
Chris@82 1035 T3r = W[12];
Chris@82 1036 T3t = W[13];
Chris@82 1037 Ip[WS(rs, 3)] = FNMS(T3t, T3u, T3r * T3s);
Chris@82 1038 Im[WS(rs, 3)] = FMA(T3r, T3u, T3t * T3s);
Chris@82 1039 }
Chris@82 1040 {
Chris@82 1041 E T3y, T3C, T3v, T3z;
Chris@82 1042 T3y = T3w - T3x;
Chris@82 1043 T3C = T3A + T3B;
Chris@82 1044 T3v = W[20];
Chris@82 1045 T3z = W[21];
Chris@82 1046 Ip[WS(rs, 5)] = FNMS(T3z, T3C, T3v * T3y);
Chris@82 1047 Im[WS(rs, 5)] = FMA(T3v, T3C, T3z * T3y);
Chris@82 1048 }
Chris@82 1049 }
Chris@82 1050 }
Chris@82 1051 }
Chris@82 1052 }
Chris@82 1053
Chris@82 1054 static const tw_instr twinstr[] = {
Chris@82 1055 {TW_FULL, 1, 20},
Chris@82 1056 {TW_NEXT, 1, 0}
Chris@82 1057 };
Chris@82 1058
Chris@82 1059 static const hc2c_desc desc = { 20, "hc2cb_20", twinstr, &GENUS, {184, 62, 62, 0} };
Chris@82 1060
Chris@82 1061 void X(codelet_hc2cb_20) (planner *p) {
Chris@82 1062 X(khc2c_register) (p, hc2cb_20, &desc, HC2C_VIA_RDFT);
Chris@82 1063 }
Chris@82 1064 #endif