annotate src/fftw-3.3.8/rdft/scalar/r2cb/hc2cbdft_10.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:07:58 EDT 2018 */
Chris@82 23
Chris@82 24 #include "rdft/codelet-rdft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cbdft_10 -include rdft/scalar/hc2cb.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 122 FP additions, 72 FP multiplications,
Chris@82 32 * (or, 68 additions, 18 multiplications, 54 fused multiply/add),
Chris@82 33 * 91 stack variables, 4 constants, and 40 memory accesses
Chris@82 34 */
Chris@82 35 #include "rdft/scalar/hc2cb.h"
Chris@82 36
Chris@82 37 static void hc2cbdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 40 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 41 DK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 42 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 43 {
Chris@82 44 INT m;
Chris@82 45 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 46 E T3, Tl, Tu, T14, Ti, T13, Ts, Tt, T1p, T23, TZ, T1z, TQ, T1g, TV;
Chris@82 47 E T1l, TT, TU, T1j, T1k, T1c, T1Y, TK, T1u;
Chris@82 48 {
Chris@82 49 E Td, Tp, Tg, Tq, Th, Tr, T6, Tm, T9, Tn, Ta, To, T1, T2;
Chris@82 50 T1 = Rp[0];
Chris@82 51 T2 = Rm[WS(rs, 4)];
Chris@82 52 T3 = T1 + T2;
Chris@82 53 Tl = T1 - T2;
Chris@82 54 {
Chris@82 55 E Tb, Tc, Te, Tf;
Chris@82 56 Tb = Rp[WS(rs, 4)];
Chris@82 57 Tc = Rm[0];
Chris@82 58 Td = Tb + Tc;
Chris@82 59 Tp = Tb - Tc;
Chris@82 60 Te = Rm[WS(rs, 3)];
Chris@82 61 Tf = Rp[WS(rs, 1)];
Chris@82 62 Tg = Te + Tf;
Chris@82 63 Tq = Te - Tf;
Chris@82 64 }
Chris@82 65 Th = Td + Tg;
Chris@82 66 Tr = Tp + Tq;
Chris@82 67 {
Chris@82 68 E T4, T5, T7, T8;
Chris@82 69 T4 = Rp[WS(rs, 2)];
Chris@82 70 T5 = Rm[WS(rs, 2)];
Chris@82 71 T6 = T4 + T5;
Chris@82 72 Tm = T4 - T5;
Chris@82 73 T7 = Rm[WS(rs, 1)];
Chris@82 74 T8 = Rp[WS(rs, 3)];
Chris@82 75 T9 = T7 + T8;
Chris@82 76 Tn = T7 - T8;
Chris@82 77 }
Chris@82 78 Ta = T6 + T9;
Chris@82 79 To = Tm + Tn;
Chris@82 80 Tu = To - Tr;
Chris@82 81 T14 = Ta - Th;
Chris@82 82 Ti = Ta + Th;
Chris@82 83 T13 = FNMS(KP250000000, Ti, T3);
Chris@82 84 Ts = To + Tr;
Chris@82 85 Tt = FNMS(KP250000000, Ts, Tl);
Chris@82 86 {
Chris@82 87 E T1n, T1o, TX, TY;
Chris@82 88 T1n = Td - Tg;
Chris@82 89 T1o = T6 - T9;
Chris@82 90 T1p = FNMS(KP618033988, T1o, T1n);
Chris@82 91 T23 = FMA(KP618033988, T1n, T1o);
Chris@82 92 TX = Tm - Tn;
Chris@82 93 TY = Tp - Tq;
Chris@82 94 TZ = FMA(KP618033988, TY, TX);
Chris@82 95 T1z = FNMS(KP618033988, TX, TY);
Chris@82 96 }
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E TF, T16, TI, T17, TS, T1i, Ty, T19, TB, T1a, TR, T1h, TO, TP;
Chris@82 100 TO = Ip[0];
Chris@82 101 TP = Im[WS(rs, 4)];
Chris@82 102 TQ = TO + TP;
Chris@82 103 T1g = TO - TP;
Chris@82 104 {
Chris@82 105 E TD, TE, TG, TH;
Chris@82 106 TD = Ip[WS(rs, 4)];
Chris@82 107 TE = Im[0];
Chris@82 108 TF = TD + TE;
Chris@82 109 T16 = TD - TE;
Chris@82 110 TG = Im[WS(rs, 3)];
Chris@82 111 TH = Ip[WS(rs, 1)];
Chris@82 112 TI = TG + TH;
Chris@82 113 T17 = TH - TG;
Chris@82 114 }
Chris@82 115 TS = TF - TI;
Chris@82 116 T1i = T16 + T17;
Chris@82 117 {
Chris@82 118 E Tw, Tx, Tz, TA;
Chris@82 119 Tw = Ip[WS(rs, 2)];
Chris@82 120 Tx = Im[WS(rs, 2)];
Chris@82 121 Ty = Tw + Tx;
Chris@82 122 T19 = Tw - Tx;
Chris@82 123 Tz = Im[WS(rs, 1)];
Chris@82 124 TA = Ip[WS(rs, 3)];
Chris@82 125 TB = Tz + TA;
Chris@82 126 T1a = TA - Tz;
Chris@82 127 }
Chris@82 128 TR = Ty - TB;
Chris@82 129 T1h = T19 + T1a;
Chris@82 130 TV = TR - TS;
Chris@82 131 T1l = T1h - T1i;
Chris@82 132 TT = TR + TS;
Chris@82 133 TU = FNMS(KP250000000, TT, TQ);
Chris@82 134 T1j = T1h + T1i;
Chris@82 135 T1k = FNMS(KP250000000, T1j, T1g);
Chris@82 136 {
Chris@82 137 E T18, T1b, TC, TJ;
Chris@82 138 T18 = T16 - T17;
Chris@82 139 T1b = T19 - T1a;
Chris@82 140 T1c = FNMS(KP618033988, T1b, T18);
Chris@82 141 T1Y = FMA(KP618033988, T18, T1b);
Chris@82 142 TC = Ty + TB;
Chris@82 143 TJ = TF + TI;
Chris@82 144 TK = FMA(KP618033988, TJ, TC);
Chris@82 145 T1u = FNMS(KP618033988, TC, TJ);
Chris@82 146 }
Chris@82 147 }
Chris@82 148 {
Chris@82 149 E Tj, T2y, T2a, T1A, T2q, T10, T1Q, T24, T2k, T1q, T1K, T26, T28, T29, T2c;
Chris@82 150 E Tk, TM, TN, T2w, T1M, T1O, T1P, T1S, T1s, T1w, T1x, T1C, T2m, T2o, T2p;
Chris@82 151 E T2s, T12, T1e, T1f, T1E, T1G, T1I, T1J, T1U, T1W, T20, T21, T2e, T2g, T2i;
Chris@82 152 E T2j, T2u, T1y, TW, T22, T2l, T2r;
Chris@82 153 Tj = T3 + Ti;
Chris@82 154 T2y = T1g + T1j;
Chris@82 155 T2a = TQ + TT;
Chris@82 156 T1y = FNMS(KP559016994, TV, TU);
Chris@82 157 T1A = FMA(KP951056516, T1z, T1y);
Chris@82 158 T2q = FNMS(KP951056516, T1z, T1y);
Chris@82 159 TW = FMA(KP559016994, TV, TU);
Chris@82 160 T10 = FMA(KP951056516, TZ, TW);
Chris@82 161 T1Q = FNMS(KP951056516, TZ, TW);
Chris@82 162 T22 = FMA(KP559016994, T1l, T1k);
Chris@82 163 T24 = FNMS(KP951056516, T23, T22);
Chris@82 164 T2k = FMA(KP951056516, T23, T22);
Chris@82 165 {
Chris@82 166 E T1m, T1v, T2n, T1t;
Chris@82 167 T1m = FNMS(KP559016994, T1l, T1k);
Chris@82 168 T1q = FNMS(KP951056516, T1p, T1m);
Chris@82 169 T1K = FMA(KP951056516, T1p, T1m);
Chris@82 170 {
Chris@82 171 E T27, TL, T1N, Tv;
Chris@82 172 T27 = Tl + Ts;
Chris@82 173 T26 = W[9];
Chris@82 174 T28 = T26 * T27;
Chris@82 175 T29 = W[8];
Chris@82 176 T2c = T29 * T27;
Chris@82 177 Tv = FMA(KP559016994, Tu, Tt);
Chris@82 178 TL = FNMS(KP951056516, TK, Tv);
Chris@82 179 T1N = FMA(KP951056516, TK, Tv);
Chris@82 180 Tk = W[1];
Chris@82 181 TM = Tk * TL;
Chris@82 182 TN = W[0];
Chris@82 183 T2w = TN * TL;
Chris@82 184 T1M = W[17];
Chris@82 185 T1O = T1M * T1N;
Chris@82 186 T1P = W[16];
Chris@82 187 T1S = T1P * T1N;
Chris@82 188 }
Chris@82 189 T1t = FNMS(KP559016994, Tu, Tt);
Chris@82 190 T1v = FNMS(KP951056516, T1u, T1t);
Chris@82 191 T2n = FMA(KP951056516, T1u, T1t);
Chris@82 192 T1s = W[5];
Chris@82 193 T1w = T1s * T1v;
Chris@82 194 T1x = W[4];
Chris@82 195 T1C = T1x * T1v;
Chris@82 196 T2m = W[13];
Chris@82 197 T2o = T2m * T2n;
Chris@82 198 T2p = W[12];
Chris@82 199 T2s = T2p * T2n;
Chris@82 200 {
Chris@82 201 E T1d, T1H, T15, T1Z, T2h, T1X;
Chris@82 202 T15 = FNMS(KP559016994, T14, T13);
Chris@82 203 T1d = FMA(KP951056516, T1c, T15);
Chris@82 204 T1H = FNMS(KP951056516, T1c, T15);
Chris@82 205 T12 = W[2];
Chris@82 206 T1e = T12 * T1d;
Chris@82 207 T1f = W[3];
Chris@82 208 T1E = T1f * T1d;
Chris@82 209 T1G = W[14];
Chris@82 210 T1I = T1G * T1H;
Chris@82 211 T1J = W[15];
Chris@82 212 T1U = T1J * T1H;
Chris@82 213 T1X = FMA(KP559016994, T14, T13);
Chris@82 214 T1Z = FMA(KP951056516, T1Y, T1X);
Chris@82 215 T2h = FNMS(KP951056516, T1Y, T1X);
Chris@82 216 T1W = W[6];
Chris@82 217 T20 = T1W * T1Z;
Chris@82 218 T21 = W[7];
Chris@82 219 T2e = T21 * T1Z;
Chris@82 220 T2g = W[10];
Chris@82 221 T2i = T2g * T2h;
Chris@82 222 T2j = W[11];
Chris@82 223 T2u = T2j * T2h;
Chris@82 224 }
Chris@82 225 }
Chris@82 226 {
Chris@82 227 E T11, T2x, T1r, T1B;
Chris@82 228 T11 = FMA(TN, T10, TM);
Chris@82 229 Rp[0] = Tj - T11;
Chris@82 230 Rm[0] = Tj + T11;
Chris@82 231 T2x = FNMS(Tk, T10, T2w);
Chris@82 232 Im[0] = T2x - T2y;
Chris@82 233 Ip[0] = T2x + T2y;
Chris@82 234 T1r = FNMS(T1f, T1q, T1e);
Chris@82 235 T1B = FMA(T1x, T1A, T1w);
Chris@82 236 Rp[WS(rs, 1)] = T1r - T1B;
Chris@82 237 Rm[WS(rs, 1)] = T1B + T1r;
Chris@82 238 {
Chris@82 239 E T1D, T1F, T1L, T1R;
Chris@82 240 T1D = FNMS(T1s, T1A, T1C);
Chris@82 241 T1F = FMA(T12, T1q, T1E);
Chris@82 242 Im[WS(rs, 1)] = T1D - T1F;
Chris@82 243 Ip[WS(rs, 1)] = T1D + T1F;
Chris@82 244 T1L = FNMS(T1J, T1K, T1I);
Chris@82 245 T1R = FMA(T1P, T1Q, T1O);
Chris@82 246 Rp[WS(rs, 4)] = T1L - T1R;
Chris@82 247 Rm[WS(rs, 4)] = T1R + T1L;
Chris@82 248 }
Chris@82 249 }
Chris@82 250 {
Chris@82 251 E T1T, T1V, T2t, T2v;
Chris@82 252 T1T = FNMS(T1M, T1Q, T1S);
Chris@82 253 T1V = FMA(T1G, T1K, T1U);
Chris@82 254 Im[WS(rs, 4)] = T1T - T1V;
Chris@82 255 Ip[WS(rs, 4)] = T1T + T1V;
Chris@82 256 T2t = FNMS(T2m, T2q, T2s);
Chris@82 257 T2v = FMA(T2g, T2k, T2u);
Chris@82 258 Im[WS(rs, 3)] = T2t - T2v;
Chris@82 259 Ip[WS(rs, 3)] = T2t + T2v;
Chris@82 260 }
Chris@82 261 T2l = FNMS(T2j, T2k, T2i);
Chris@82 262 T2r = FMA(T2p, T2q, T2o);
Chris@82 263 Rp[WS(rs, 3)] = T2l - T2r;
Chris@82 264 Rm[WS(rs, 3)] = T2r + T2l;
Chris@82 265 {
Chris@82 266 E T25, T2b, T2d, T2f;
Chris@82 267 T25 = FNMS(T21, T24, T20);
Chris@82 268 T2b = FMA(T29, T2a, T28);
Chris@82 269 Rp[WS(rs, 2)] = T25 - T2b;
Chris@82 270 Rm[WS(rs, 2)] = T2b + T25;
Chris@82 271 T2d = FNMS(T26, T2a, T2c);
Chris@82 272 T2f = FMA(T1W, T24, T2e);
Chris@82 273 Im[WS(rs, 2)] = T2d - T2f;
Chris@82 274 Ip[WS(rs, 2)] = T2d + T2f;
Chris@82 275 }
Chris@82 276 }
Chris@82 277 }
Chris@82 278 }
Chris@82 279 }
Chris@82 280
Chris@82 281 static const tw_instr twinstr[] = {
Chris@82 282 {TW_FULL, 1, 10},
Chris@82 283 {TW_NEXT, 1, 0}
Chris@82 284 };
Chris@82 285
Chris@82 286 static const hc2c_desc desc = { 10, "hc2cbdft_10", twinstr, &GENUS, {68, 18, 54, 0} };
Chris@82 287
Chris@82 288 void X(codelet_hc2cbdft_10) (planner *p) {
Chris@82 289 X(khc2c_register) (p, hc2cbdft_10, &desc, HC2C_VIA_DFT);
Chris@82 290 }
Chris@82 291 #else
Chris@82 292
Chris@82 293 /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 10 -dif -name hc2cbdft_10 -include rdft/scalar/hc2cb.h */
Chris@82 294
Chris@82 295 /*
Chris@82 296 * This function contains 122 FP additions, 60 FP multiplications,
Chris@82 297 * (or, 92 additions, 30 multiplications, 30 fused multiply/add),
Chris@82 298 * 61 stack variables, 4 constants, and 40 memory accesses
Chris@82 299 */
Chris@82 300 #include "rdft/scalar/hc2cb.h"
Chris@82 301
Chris@82 302 static void hc2cbdft_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 303 {
Chris@82 304 DK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 305 DK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 306 DK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 307 DK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 308 {
Chris@82 309 INT m;
Chris@82 310 for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 18, MAKE_VOLATILE_STRIDE(40, rs)) {
Chris@82 311 E T3, TS, TR, T13, Ti, T12, TT, TU, T1g, T1T, Tr, T1s, TJ, T1h, TG;
Chris@82 312 E T1m, TK, TL, T1k, T1l, T1b, T1P, TY, T1w;
Chris@82 313 {
Chris@82 314 E Td, To, Tg, Tp, Th, TQ, T6, Tl, T9, Tm, Ta, TP, T1, T2;
Chris@82 315 T1 = Rp[0];
Chris@82 316 T2 = Rm[WS(rs, 4)];
Chris@82 317 T3 = T1 + T2;
Chris@82 318 TS = T1 - T2;
Chris@82 319 {
Chris@82 320 E Tb, Tc, Te, Tf;
Chris@82 321 Tb = Rp[WS(rs, 4)];
Chris@82 322 Tc = Rm[0];
Chris@82 323 Td = Tb + Tc;
Chris@82 324 To = Tb - Tc;
Chris@82 325 Te = Rm[WS(rs, 3)];
Chris@82 326 Tf = Rp[WS(rs, 1)];
Chris@82 327 Tg = Te + Tf;
Chris@82 328 Tp = Te - Tf;
Chris@82 329 }
Chris@82 330 Th = Td + Tg;
Chris@82 331 TQ = To + Tp;
Chris@82 332 {
Chris@82 333 E T4, T5, T7, T8;
Chris@82 334 T4 = Rp[WS(rs, 2)];
Chris@82 335 T5 = Rm[WS(rs, 2)];
Chris@82 336 T6 = T4 + T5;
Chris@82 337 Tl = T4 - T5;
Chris@82 338 T7 = Rm[WS(rs, 1)];
Chris@82 339 T8 = Rp[WS(rs, 3)];
Chris@82 340 T9 = T7 + T8;
Chris@82 341 Tm = T7 - T8;
Chris@82 342 }
Chris@82 343 Ta = T6 + T9;
Chris@82 344 TP = Tl + Tm;
Chris@82 345 TR = KP559016994 * (TP - TQ);
Chris@82 346 T13 = KP559016994 * (Ta - Th);
Chris@82 347 Ti = Ta + Th;
Chris@82 348 T12 = FNMS(KP250000000, Ti, T3);
Chris@82 349 TT = TP + TQ;
Chris@82 350 TU = FNMS(KP250000000, TT, TS);
Chris@82 351 {
Chris@82 352 E T1e, T1f, Tn, Tq;
Chris@82 353 T1e = T6 - T9;
Chris@82 354 T1f = Td - Tg;
Chris@82 355 T1g = FNMS(KP951056516, T1f, KP587785252 * T1e);
Chris@82 356 T1T = FMA(KP951056516, T1e, KP587785252 * T1f);
Chris@82 357 Tn = Tl - Tm;
Chris@82 358 Tq = To - Tp;
Chris@82 359 Tr = FMA(KP951056516, Tn, KP587785252 * Tq);
Chris@82 360 T1s = FNMS(KP951056516, Tq, KP587785252 * Tn);
Chris@82 361 }
Chris@82 362 }
Chris@82 363 {
Chris@82 364 E TB, T18, TE, T19, TF, T1j, Tu, T15, Tx, T16, Ty, T1i, TH, TI;
Chris@82 365 TH = Ip[0];
Chris@82 366 TI = Im[WS(rs, 4)];
Chris@82 367 TJ = TH + TI;
Chris@82 368 T1h = TH - TI;
Chris@82 369 {
Chris@82 370 E Tz, TA, TC, TD;
Chris@82 371 Tz = Ip[WS(rs, 4)];
Chris@82 372 TA = Im[0];
Chris@82 373 TB = Tz + TA;
Chris@82 374 T18 = Tz - TA;
Chris@82 375 TC = Im[WS(rs, 3)];
Chris@82 376 TD = Ip[WS(rs, 1)];
Chris@82 377 TE = TC + TD;
Chris@82 378 T19 = TD - TC;
Chris@82 379 }
Chris@82 380 TF = TB - TE;
Chris@82 381 T1j = T18 + T19;
Chris@82 382 {
Chris@82 383 E Ts, Tt, Tv, Tw;
Chris@82 384 Ts = Ip[WS(rs, 2)];
Chris@82 385 Tt = Im[WS(rs, 2)];
Chris@82 386 Tu = Ts + Tt;
Chris@82 387 T15 = Ts - Tt;
Chris@82 388 Tv = Im[WS(rs, 1)];
Chris@82 389 Tw = Ip[WS(rs, 3)];
Chris@82 390 Tx = Tv + Tw;
Chris@82 391 T16 = Tw - Tv;
Chris@82 392 }
Chris@82 393 Ty = Tu - Tx;
Chris@82 394 T1i = T15 + T16;
Chris@82 395 TG = KP559016994 * (Ty - TF);
Chris@82 396 T1m = KP559016994 * (T1i - T1j);
Chris@82 397 TK = Ty + TF;
Chris@82 398 TL = FNMS(KP250000000, TK, TJ);
Chris@82 399 T1k = T1i + T1j;
Chris@82 400 T1l = FNMS(KP250000000, T1k, T1h);
Chris@82 401 {
Chris@82 402 E T17, T1a, TW, TX;
Chris@82 403 T17 = T15 - T16;
Chris@82 404 T1a = T18 - T19;
Chris@82 405 T1b = FNMS(KP951056516, T1a, KP587785252 * T17);
Chris@82 406 T1P = FMA(KP951056516, T17, KP587785252 * T1a);
Chris@82 407 TW = Tu + Tx;
Chris@82 408 TX = TB + TE;
Chris@82 409 TY = FMA(KP951056516, TW, KP587785252 * TX);
Chris@82 410 T1w = FNMS(KP951056516, TX, KP587785252 * TW);
Chris@82 411 }
Chris@82 412 }
Chris@82 413 {
Chris@82 414 E Tj, T2g, TN, T1H, T1U, T26, TZ, T1J, T1Q, T24, T1c, T1C, T1t, T29, T1o;
Chris@82 415 E T1E, T1x, T2b, T20, T21, TM, T1S, TV;
Chris@82 416 Tj = T3 + Ti;
Chris@82 417 T2g = T1h + T1k;
Chris@82 418 TM = TG + TL;
Chris@82 419 TN = Tr + TM;
Chris@82 420 T1H = TM - Tr;
Chris@82 421 T1S = T1m + T1l;
Chris@82 422 T1U = T1S - T1T;
Chris@82 423 T26 = T1T + T1S;
Chris@82 424 TV = TR + TU;
Chris@82 425 TZ = TV - TY;
Chris@82 426 T1J = TV + TY;
Chris@82 427 {
Chris@82 428 E T1O, T14, T1r, T1n, T1v;
Chris@82 429 T1O = T13 + T12;
Chris@82 430 T1Q = T1O + T1P;
Chris@82 431 T24 = T1O - T1P;
Chris@82 432 T14 = T12 - T13;
Chris@82 433 T1c = T14 - T1b;
Chris@82 434 T1C = T14 + T1b;
Chris@82 435 T1r = TL - TG;
Chris@82 436 T1t = T1r - T1s;
Chris@82 437 T29 = T1s + T1r;
Chris@82 438 T1n = T1l - T1m;
Chris@82 439 T1o = T1g + T1n;
Chris@82 440 T1E = T1n - T1g;
Chris@82 441 T1v = TU - TR;
Chris@82 442 T1x = T1v + T1w;
Chris@82 443 T2b = T1v - T1w;
Chris@82 444 {
Chris@82 445 E T1X, T1Z, T1W, T1Y;
Chris@82 446 T1X = TS + TT;
Chris@82 447 T1Z = TJ + TK;
Chris@82 448 T1W = W[9];
Chris@82 449 T1Y = W[8];
Chris@82 450 T20 = FMA(T1W, T1X, T1Y * T1Z);
Chris@82 451 T21 = FNMS(T1W, T1Z, T1Y * T1X);
Chris@82 452 }
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E T10, T2f, Tk, TO;
Chris@82 456 Tk = W[0];
Chris@82 457 TO = W[1];
Chris@82 458 T10 = FMA(Tk, TN, TO * TZ);
Chris@82 459 T2f = FNMS(TO, TN, Tk * TZ);
Chris@82 460 Rp[0] = Tj - T10;
Chris@82 461 Ip[0] = T2f + T2g;
Chris@82 462 Rm[0] = Tj + T10;
Chris@82 463 Im[0] = T2f - T2g;
Chris@82 464 }
Chris@82 465 {
Chris@82 466 E T1V, T22, T1N, T1R;
Chris@82 467 T1N = W[6];
Chris@82 468 T1R = W[7];
Chris@82 469 T1V = FNMS(T1R, T1U, T1N * T1Q);
Chris@82 470 T22 = FMA(T1R, T1Q, T1N * T1U);
Chris@82 471 Rp[WS(rs, 2)] = T1V - T20;
Chris@82 472 Ip[WS(rs, 2)] = T21 + T22;
Chris@82 473 Rm[WS(rs, 2)] = T20 + T1V;
Chris@82 474 Im[WS(rs, 2)] = T21 - T22;
Chris@82 475 }
Chris@82 476 {
Chris@82 477 E T1p, T1A, T1y, T1z;
Chris@82 478 {
Chris@82 479 E T11, T1d, T1q, T1u;
Chris@82 480 T11 = W[2];
Chris@82 481 T1d = W[3];
Chris@82 482 T1p = FNMS(T1d, T1o, T11 * T1c);
Chris@82 483 T1A = FMA(T1d, T1c, T11 * T1o);
Chris@82 484 T1q = W[4];
Chris@82 485 T1u = W[5];
Chris@82 486 T1y = FMA(T1q, T1t, T1u * T1x);
Chris@82 487 T1z = FNMS(T1u, T1t, T1q * T1x);
Chris@82 488 }
Chris@82 489 Rp[WS(rs, 1)] = T1p - T1y;
Chris@82 490 Ip[WS(rs, 1)] = T1z + T1A;
Chris@82 491 Rm[WS(rs, 1)] = T1y + T1p;
Chris@82 492 Im[WS(rs, 1)] = T1z - T1A;
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T1F, T1M, T1K, T1L;
Chris@82 496 {
Chris@82 497 E T1B, T1D, T1G, T1I;
Chris@82 498 T1B = W[14];
Chris@82 499 T1D = W[15];
Chris@82 500 T1F = FNMS(T1D, T1E, T1B * T1C);
Chris@82 501 T1M = FMA(T1D, T1C, T1B * T1E);
Chris@82 502 T1G = W[16];
Chris@82 503 T1I = W[17];
Chris@82 504 T1K = FMA(T1G, T1H, T1I * T1J);
Chris@82 505 T1L = FNMS(T1I, T1H, T1G * T1J);
Chris@82 506 }
Chris@82 507 Rp[WS(rs, 4)] = T1F - T1K;
Chris@82 508 Ip[WS(rs, 4)] = T1L + T1M;
Chris@82 509 Rm[WS(rs, 4)] = T1K + T1F;
Chris@82 510 Im[WS(rs, 4)] = T1L - T1M;
Chris@82 511 }
Chris@82 512 {
Chris@82 513 E T27, T2e, T2c, T2d;
Chris@82 514 {
Chris@82 515 E T23, T25, T28, T2a;
Chris@82 516 T23 = W[10];
Chris@82 517 T25 = W[11];
Chris@82 518 T27 = FNMS(T25, T26, T23 * T24);
Chris@82 519 T2e = FMA(T25, T24, T23 * T26);
Chris@82 520 T28 = W[12];
Chris@82 521 T2a = W[13];
Chris@82 522 T2c = FMA(T28, T29, T2a * T2b);
Chris@82 523 T2d = FNMS(T2a, T29, T28 * T2b);
Chris@82 524 }
Chris@82 525 Rp[WS(rs, 3)] = T27 - T2c;
Chris@82 526 Ip[WS(rs, 3)] = T2d + T2e;
Chris@82 527 Rm[WS(rs, 3)] = T2c + T27;
Chris@82 528 Im[WS(rs, 3)] = T2d - T2e;
Chris@82 529 }
Chris@82 530 }
Chris@82 531 }
Chris@82 532 }
Chris@82 533 }
Chris@82 534
Chris@82 535 static const tw_instr twinstr[] = {
Chris@82 536 {TW_FULL, 1, 10},
Chris@82 537 {TW_NEXT, 1, 0}
Chris@82 538 };
Chris@82 539
Chris@82 540 static const hc2c_desc desc = { 10, "hc2cbdft_10", twinstr, &GENUS, {92, 30, 30, 0} };
Chris@82 541
Chris@82 542 void X(codelet_hc2cbdft_10) (planner *p) {
Chris@82 543 X(khc2c_register) (p, hc2cbdft_10, &desc, HC2C_VIA_DFT);
Chris@82 544 }
Chris@82 545 #endif