annotate src/fftw-3.3.8/dft/scalar/codelets/n1_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:10 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 148 FP additions, 84 FP multiplications,
Chris@82 32 * (or, 64 additions, 0 multiplications, 84 fused multiply/add),
Chris@82 33 * 67 stack variables, 6 constants, and 56 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 40 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@82 41 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@82 42 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 43 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@82 44 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@82 45 {
Chris@82 46 INT i;
Chris@82 47 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) {
Chris@82 48 E T3, Tp, T1b, T1x, T1i, T1L, T1M, T1j, T1k, T1K, Ta, To, Th, Tz, T14;
Chris@82 49 E TZ, Ts, Ty, Tv, T1Z, T2c, T27, TI, T23, T24, TP, TW, T22, T1c, T1e;
Chris@82 50 E T1d, T1f, T1s, T1n, T1A, T1G, T1D, T1H, T1U, T1P;
Chris@82 51 {
Chris@82 52 E T1, T2, T19, T1a;
Chris@82 53 T1 = ri[0];
Chris@82 54 T2 = ri[WS(is, 7)];
Chris@82 55 T3 = T1 - T2;
Chris@82 56 Tp = T1 + T2;
Chris@82 57 T19 = ii[0];
Chris@82 58 T1a = ii[WS(is, 7)];
Chris@82 59 T1b = T19 - T1a;
Chris@82 60 T1x = T19 + T1a;
Chris@82 61 }
Chris@82 62 {
Chris@82 63 E T6, Tq, T9, Tr, Tn, Tx, Tk, Tw, Tg, Tu, Td, Tt;
Chris@82 64 {
Chris@82 65 E T4, T5, Ti, Tj;
Chris@82 66 T4 = ri[WS(is, 2)];
Chris@82 67 T5 = ri[WS(is, 9)];
Chris@82 68 T6 = T4 - T5;
Chris@82 69 Tq = T4 + T5;
Chris@82 70 {
Chris@82 71 E T7, T8, Tl, Tm;
Chris@82 72 T7 = ri[WS(is, 12)];
Chris@82 73 T8 = ri[WS(is, 5)];
Chris@82 74 T9 = T7 - T8;
Chris@82 75 Tr = T7 + T8;
Chris@82 76 Tl = ri[WS(is, 8)];
Chris@82 77 Tm = ri[WS(is, 1)];
Chris@82 78 Tn = Tl - Tm;
Chris@82 79 Tx = Tl + Tm;
Chris@82 80 }
Chris@82 81 Ti = ri[WS(is, 6)];
Chris@82 82 Tj = ri[WS(is, 13)];
Chris@82 83 Tk = Ti - Tj;
Chris@82 84 Tw = Ti + Tj;
Chris@82 85 {
Chris@82 86 E Te, Tf, Tb, Tc;
Chris@82 87 Te = ri[WS(is, 10)];
Chris@82 88 Tf = ri[WS(is, 3)];
Chris@82 89 Tg = Te - Tf;
Chris@82 90 Tu = Te + Tf;
Chris@82 91 Tb = ri[WS(is, 4)];
Chris@82 92 Tc = ri[WS(is, 11)];
Chris@82 93 Td = Tb - Tc;
Chris@82 94 Tt = Tb + Tc;
Chris@82 95 }
Chris@82 96 }
Chris@82 97 T1i = Tn - Tk;
Chris@82 98 T1L = Tt - Tu;
Chris@82 99 T1M = Tr - Tq;
Chris@82 100 T1j = Tg - Td;
Chris@82 101 T1k = T9 - T6;
Chris@82 102 T1K = Tw - Tx;
Chris@82 103 Ta = T6 + T9;
Chris@82 104 To = Tk + Tn;
Chris@82 105 Th = Td + Tg;
Chris@82 106 Tz = FNMS(KP356895867, Th, Ta);
Chris@82 107 T14 = FNMS(KP356895867, To, Th);
Chris@82 108 TZ = FNMS(KP356895867, Ta, To);
Chris@82 109 Ts = Tq + Tr;
Chris@82 110 Ty = Tw + Tx;
Chris@82 111 Tv = Tt + Tu;
Chris@82 112 T1Z = FNMS(KP356895867, Ts, Ty);
Chris@82 113 T2c = FNMS(KP356895867, Ty, Tv);
Chris@82 114 T27 = FNMS(KP356895867, Tv, Ts);
Chris@82 115 }
Chris@82 116 {
Chris@82 117 E TE, T1B, TH, T1C, TV, T1F, TS, T1E, TO, T1z, TL, T1y;
Chris@82 118 {
Chris@82 119 E TC, TD, TQ, TR;
Chris@82 120 TC = ii[WS(is, 4)];
Chris@82 121 TD = ii[WS(is, 11)];
Chris@82 122 TE = TC - TD;
Chris@82 123 T1B = TC + TD;
Chris@82 124 {
Chris@82 125 E TF, TG, TT, TU;
Chris@82 126 TF = ii[WS(is, 10)];
Chris@82 127 TG = ii[WS(is, 3)];
Chris@82 128 TH = TF - TG;
Chris@82 129 T1C = TF + TG;
Chris@82 130 TT = ii[WS(is, 8)];
Chris@82 131 TU = ii[WS(is, 1)];
Chris@82 132 TV = TT - TU;
Chris@82 133 T1F = TT + TU;
Chris@82 134 }
Chris@82 135 TQ = ii[WS(is, 6)];
Chris@82 136 TR = ii[WS(is, 13)];
Chris@82 137 TS = TQ - TR;
Chris@82 138 T1E = TQ + TR;
Chris@82 139 {
Chris@82 140 E TM, TN, TJ, TK;
Chris@82 141 TM = ii[WS(is, 12)];
Chris@82 142 TN = ii[WS(is, 5)];
Chris@82 143 TO = TM - TN;
Chris@82 144 T1z = TM + TN;
Chris@82 145 TJ = ii[WS(is, 2)];
Chris@82 146 TK = ii[WS(is, 9)];
Chris@82 147 TL = TJ - TK;
Chris@82 148 T1y = TJ + TK;
Chris@82 149 }
Chris@82 150 }
Chris@82 151 TI = TE - TH;
Chris@82 152 T23 = T1F - T1E;
Chris@82 153 T24 = T1C - T1B;
Chris@82 154 TP = TL - TO;
Chris@82 155 TW = TS - TV;
Chris@82 156 T22 = T1y - T1z;
Chris@82 157 T1c = TL + TO;
Chris@82 158 T1e = TS + TV;
Chris@82 159 T1d = TE + TH;
Chris@82 160 T1f = FNMS(KP356895867, T1e, T1d);
Chris@82 161 T1s = FNMS(KP356895867, T1d, T1c);
Chris@82 162 T1n = FNMS(KP356895867, T1c, T1e);
Chris@82 163 T1A = T1y + T1z;
Chris@82 164 T1G = T1E + T1F;
Chris@82 165 T1D = T1B + T1C;
Chris@82 166 T1H = FNMS(KP356895867, T1G, T1D);
Chris@82 167 T1U = FNMS(KP356895867, T1D, T1A);
Chris@82 168 T1P = FNMS(KP356895867, T1A, T1G);
Chris@82 169 }
Chris@82 170 ro[WS(os, 7)] = T3 + Ta + Th + To;
Chris@82 171 io[WS(os, 7)] = T1b + T1c + T1d + T1e;
Chris@82 172 ro[0] = Tp + Ts + Tv + Ty;
Chris@82 173 io[0] = T1x + T1A + T1D + T1G;
Chris@82 174 {
Chris@82 175 E TB, TY, TA, TX;
Chris@82 176 TA = FNMS(KP692021471, Tz, To);
Chris@82 177 TB = FNMS(KP900968867, TA, T3);
Chris@82 178 TX = FMA(KP554958132, TW, TP);
Chris@82 179 TY = FMA(KP801937735, TX, TI);
Chris@82 180 ro[WS(os, 13)] = FNMS(KP974927912, TY, TB);
Chris@82 181 ro[WS(os, 1)] = FMA(KP974927912, TY, TB);
Chris@82 182 }
Chris@82 183 {
Chris@82 184 E T1u, T1w, T1t, T1v;
Chris@82 185 T1t = FNMS(KP692021471, T1s, T1e);
Chris@82 186 T1u = FNMS(KP900968867, T1t, T1b);
Chris@82 187 T1v = FMA(KP554958132, T1i, T1k);
Chris@82 188 T1w = FMA(KP801937735, T1v, T1j);
Chris@82 189 io[WS(os, 1)] = FMA(KP974927912, T1w, T1u);
Chris@82 190 io[WS(os, 13)] = FNMS(KP974927912, T1w, T1u);
Chris@82 191 }
Chris@82 192 {
Chris@82 193 E T11, T13, T10, T12;
Chris@82 194 T10 = FNMS(KP692021471, TZ, Th);
Chris@82 195 T11 = FNMS(KP900968867, T10, T3);
Chris@82 196 T12 = FMA(KP554958132, TI, TW);
Chris@82 197 T13 = FNMS(KP801937735, T12, TP);
Chris@82 198 ro[WS(os, 5)] = FNMS(KP974927912, T13, T11);
Chris@82 199 ro[WS(os, 9)] = FMA(KP974927912, T13, T11);
Chris@82 200 }
Chris@82 201 {
Chris@82 202 E T1p, T1r, T1o, T1q;
Chris@82 203 T1o = FNMS(KP692021471, T1n, T1d);
Chris@82 204 T1p = FNMS(KP900968867, T1o, T1b);
Chris@82 205 T1q = FMA(KP554958132, T1j, T1i);
Chris@82 206 T1r = FNMS(KP801937735, T1q, T1k);
Chris@82 207 io[WS(os, 5)] = FNMS(KP974927912, T1r, T1p);
Chris@82 208 io[WS(os, 9)] = FMA(KP974927912, T1r, T1p);
Chris@82 209 }
Chris@82 210 {
Chris@82 211 E T16, T18, T15, T17;
Chris@82 212 T15 = FNMS(KP692021471, T14, Ta);
Chris@82 213 T16 = FNMS(KP900968867, T15, T3);
Chris@82 214 T17 = FNMS(KP554958132, TP, TI);
Chris@82 215 T18 = FNMS(KP801937735, T17, TW);
Chris@82 216 ro[WS(os, 11)] = FNMS(KP974927912, T18, T16);
Chris@82 217 ro[WS(os, 3)] = FMA(KP974927912, T18, T16);
Chris@82 218 }
Chris@82 219 {
Chris@82 220 E T1h, T1m, T1g, T1l;
Chris@82 221 T1g = FNMS(KP692021471, T1f, T1c);
Chris@82 222 T1h = FNMS(KP900968867, T1g, T1b);
Chris@82 223 T1l = FNMS(KP554958132, T1k, T1j);
Chris@82 224 T1m = FNMS(KP801937735, T1l, T1i);
Chris@82 225 io[WS(os, 3)] = FMA(KP974927912, T1m, T1h);
Chris@82 226 io[WS(os, 11)] = FNMS(KP974927912, T1m, T1h);
Chris@82 227 }
Chris@82 228 {
Chris@82 229 E T1J, T1O, T1I, T1N;
Chris@82 230 T1I = FNMS(KP692021471, T1H, T1A);
Chris@82 231 T1J = FNMS(KP900968867, T1I, T1x);
Chris@82 232 T1N = FMA(KP554958132, T1M, T1L);
Chris@82 233 T1O = FNMS(KP801937735, T1N, T1K);
Chris@82 234 io[WS(os, 4)] = FMA(KP974927912, T1O, T1J);
Chris@82 235 io[WS(os, 10)] = FNMS(KP974927912, T1O, T1J);
Chris@82 236 }
Chris@82 237 {
Chris@82 238 E T2e, T2g, T2d, T2f;
Chris@82 239 T2d = FNMS(KP692021471, T2c, Ts);
Chris@82 240 T2e = FNMS(KP900968867, T2d, Tp);
Chris@82 241 T2f = FMA(KP554958132, T22, T24);
Chris@82 242 T2g = FNMS(KP801937735, T2f, T23);
Chris@82 243 ro[WS(os, 10)] = FNMS(KP974927912, T2g, T2e);
Chris@82 244 ro[WS(os, 4)] = FMA(KP974927912, T2g, T2e);
Chris@82 245 }
Chris@82 246 {
Chris@82 247 E T1R, T1T, T1Q, T1S;
Chris@82 248 T1Q = FNMS(KP692021471, T1P, T1D);
Chris@82 249 T1R = FNMS(KP900968867, T1Q, T1x);
Chris@82 250 T1S = FMA(KP554958132, T1L, T1K);
Chris@82 251 T1T = FMA(KP801937735, T1S, T1M);
Chris@82 252 io[WS(os, 2)] = FMA(KP974927912, T1T, T1R);
Chris@82 253 io[WS(os, 12)] = FNMS(KP974927912, T1T, T1R);
Chris@82 254 }
Chris@82 255 {
Chris@82 256 E T21, T26, T20, T25;
Chris@82 257 T20 = FNMS(KP692021471, T1Z, Tv);
Chris@82 258 T21 = FNMS(KP900968867, T20, Tp);
Chris@82 259 T25 = FMA(KP554958132, T24, T23);
Chris@82 260 T26 = FMA(KP801937735, T25, T22);
Chris@82 261 ro[WS(os, 12)] = FNMS(KP974927912, T26, T21);
Chris@82 262 ro[WS(os, 2)] = FMA(KP974927912, T26, T21);
Chris@82 263 }
Chris@82 264 {
Chris@82 265 E T1W, T1Y, T1V, T1X;
Chris@82 266 T1V = FNMS(KP692021471, T1U, T1G);
Chris@82 267 T1W = FNMS(KP900968867, T1V, T1x);
Chris@82 268 T1X = FNMS(KP554958132, T1K, T1M);
Chris@82 269 T1Y = FNMS(KP801937735, T1X, T1L);
Chris@82 270 io[WS(os, 6)] = FMA(KP974927912, T1Y, T1W);
Chris@82 271 io[WS(os, 8)] = FNMS(KP974927912, T1Y, T1W);
Chris@82 272 }
Chris@82 273 {
Chris@82 274 E T29, T2b, T28, T2a;
Chris@82 275 T28 = FNMS(KP692021471, T27, Ty);
Chris@82 276 T29 = FNMS(KP900968867, T28, Tp);
Chris@82 277 T2a = FNMS(KP554958132, T23, T22);
Chris@82 278 T2b = FNMS(KP801937735, T2a, T24);
Chris@82 279 ro[WS(os, 8)] = FNMS(KP974927912, T2b, T29);
Chris@82 280 ro[WS(os, 6)] = FMA(KP974927912, T2b, T29);
Chris@82 281 }
Chris@82 282 }
Chris@82 283 }
Chris@82 284 }
Chris@82 285
Chris@82 286 static const kdft_desc desc = { 14, "n1_14", {64, 0, 84, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 287
Chris@82 288 void X(codelet_n1_14) (planner *p) {
Chris@82 289 X(kdft_register) (p, n1_14, &desc);
Chris@82 290 }
Chris@82 291
Chris@82 292 #else
Chris@82 293
Chris@82 294 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include dft/scalar/n.h */
Chris@82 295
Chris@82 296 /*
Chris@82 297 * This function contains 148 FP additions, 72 FP multiplications,
Chris@82 298 * (or, 100 additions, 24 multiplications, 48 fused multiply/add),
Chris@82 299 * 43 stack variables, 6 constants, and 56 memory accesses
Chris@82 300 */
Chris@82 301 #include "dft/scalar/n.h"
Chris@82 302
Chris@82 303 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 304 {
Chris@82 305 DK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@82 306 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@82 307 DK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@82 308 DK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@82 309 DK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@82 310 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@82 311 {
Chris@82 312 INT i;
Chris@82 313 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) {
Chris@82 314 E T3, Tp, T16, T1f, Ta, T1q, Ts, T10, TG, T1z, T19, T1i, Th, T1s, Tv;
Chris@82 315 E T12, TU, T1B, T17, T1o, To, T1r, Ty, T11, TN, T1A, T18, T1l;
Chris@82 316 {
Chris@82 317 E T1, T2, T14, T15;
Chris@82 318 T1 = ri[0];
Chris@82 319 T2 = ri[WS(is, 7)];
Chris@82 320 T3 = T1 - T2;
Chris@82 321 Tp = T1 + T2;
Chris@82 322 T14 = ii[0];
Chris@82 323 T15 = ii[WS(is, 7)];
Chris@82 324 T16 = T14 - T15;
Chris@82 325 T1f = T14 + T15;
Chris@82 326 }
Chris@82 327 {
Chris@82 328 E T6, Tq, T9, Tr;
Chris@82 329 {
Chris@82 330 E T4, T5, T7, T8;
Chris@82 331 T4 = ri[WS(is, 2)];
Chris@82 332 T5 = ri[WS(is, 9)];
Chris@82 333 T6 = T4 - T5;
Chris@82 334 Tq = T4 + T5;
Chris@82 335 T7 = ri[WS(is, 12)];
Chris@82 336 T8 = ri[WS(is, 5)];
Chris@82 337 T9 = T7 - T8;
Chris@82 338 Tr = T7 + T8;
Chris@82 339 }
Chris@82 340 Ta = T6 + T9;
Chris@82 341 T1q = Tr - Tq;
Chris@82 342 Ts = Tq + Tr;
Chris@82 343 T10 = T9 - T6;
Chris@82 344 }
Chris@82 345 {
Chris@82 346 E TC, T1g, TF, T1h;
Chris@82 347 {
Chris@82 348 E TA, TB, TD, TE;
Chris@82 349 TA = ii[WS(is, 2)];
Chris@82 350 TB = ii[WS(is, 9)];
Chris@82 351 TC = TA - TB;
Chris@82 352 T1g = TA + TB;
Chris@82 353 TD = ii[WS(is, 12)];
Chris@82 354 TE = ii[WS(is, 5)];
Chris@82 355 TF = TD - TE;
Chris@82 356 T1h = TD + TE;
Chris@82 357 }
Chris@82 358 TG = TC - TF;
Chris@82 359 T1z = T1g - T1h;
Chris@82 360 T19 = TC + TF;
Chris@82 361 T1i = T1g + T1h;
Chris@82 362 }
Chris@82 363 {
Chris@82 364 E Td, Tt, Tg, Tu;
Chris@82 365 {
Chris@82 366 E Tb, Tc, Te, Tf;
Chris@82 367 Tb = ri[WS(is, 4)];
Chris@82 368 Tc = ri[WS(is, 11)];
Chris@82 369 Td = Tb - Tc;
Chris@82 370 Tt = Tb + Tc;
Chris@82 371 Te = ri[WS(is, 10)];
Chris@82 372 Tf = ri[WS(is, 3)];
Chris@82 373 Tg = Te - Tf;
Chris@82 374 Tu = Te + Tf;
Chris@82 375 }
Chris@82 376 Th = Td + Tg;
Chris@82 377 T1s = Tt - Tu;
Chris@82 378 Tv = Tt + Tu;
Chris@82 379 T12 = Tg - Td;
Chris@82 380 }
Chris@82 381 {
Chris@82 382 E TQ, T1m, TT, T1n;
Chris@82 383 {
Chris@82 384 E TO, TP, TR, TS;
Chris@82 385 TO = ii[WS(is, 4)];
Chris@82 386 TP = ii[WS(is, 11)];
Chris@82 387 TQ = TO - TP;
Chris@82 388 T1m = TO + TP;
Chris@82 389 TR = ii[WS(is, 10)];
Chris@82 390 TS = ii[WS(is, 3)];
Chris@82 391 TT = TR - TS;
Chris@82 392 T1n = TR + TS;
Chris@82 393 }
Chris@82 394 TU = TQ - TT;
Chris@82 395 T1B = T1n - T1m;
Chris@82 396 T17 = TQ + TT;
Chris@82 397 T1o = T1m + T1n;
Chris@82 398 }
Chris@82 399 {
Chris@82 400 E Tk, Tw, Tn, Tx;
Chris@82 401 {
Chris@82 402 E Ti, Tj, Tl, Tm;
Chris@82 403 Ti = ri[WS(is, 6)];
Chris@82 404 Tj = ri[WS(is, 13)];
Chris@82 405 Tk = Ti - Tj;
Chris@82 406 Tw = Ti + Tj;
Chris@82 407 Tl = ri[WS(is, 8)];
Chris@82 408 Tm = ri[WS(is, 1)];
Chris@82 409 Tn = Tl - Tm;
Chris@82 410 Tx = Tl + Tm;
Chris@82 411 }
Chris@82 412 To = Tk + Tn;
Chris@82 413 T1r = Tw - Tx;
Chris@82 414 Ty = Tw + Tx;
Chris@82 415 T11 = Tn - Tk;
Chris@82 416 }
Chris@82 417 {
Chris@82 418 E TJ, T1j, TM, T1k;
Chris@82 419 {
Chris@82 420 E TH, TI, TK, TL;
Chris@82 421 TH = ii[WS(is, 6)];
Chris@82 422 TI = ii[WS(is, 13)];
Chris@82 423 TJ = TH - TI;
Chris@82 424 T1j = TH + TI;
Chris@82 425 TK = ii[WS(is, 8)];
Chris@82 426 TL = ii[WS(is, 1)];
Chris@82 427 TM = TK - TL;
Chris@82 428 T1k = TK + TL;
Chris@82 429 }
Chris@82 430 TN = TJ - TM;
Chris@82 431 T1A = T1k - T1j;
Chris@82 432 T18 = TJ + TM;
Chris@82 433 T1l = T1j + T1k;
Chris@82 434 }
Chris@82 435 ro[WS(os, 7)] = T3 + Ta + Th + To;
Chris@82 436 io[WS(os, 7)] = T16 + T19 + T17 + T18;
Chris@82 437 ro[0] = Tp + Ts + Tv + Ty;
Chris@82 438 io[0] = T1f + T1i + T1o + T1l;
Chris@82 439 {
Chris@82 440 E TV, Tz, T1e, T1d;
Chris@82 441 TV = FNMS(KP781831482, TN, KP974927912 * TG) - (KP433883739 * TU);
Chris@82 442 Tz = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
Chris@82 443 ro[WS(os, 5)] = Tz - TV;
Chris@82 444 ro[WS(os, 9)] = Tz + TV;
Chris@82 445 T1e = FNMS(KP781831482, T11, KP974927912 * T10) - (KP433883739 * T12);
Chris@82 446 T1d = FMA(KP623489801, T18, T16) + FNMA(KP900968867, T17, KP222520933 * T19);
Chris@82 447 io[WS(os, 5)] = T1d - T1e;
Chris@82 448 io[WS(os, 9)] = T1e + T1d;
Chris@82 449 }
Chris@82 450 {
Chris@82 451 E TX, TW, T1b, T1c;
Chris@82 452 TX = FMA(KP781831482, TG, KP974927912 * TU) + (KP433883739 * TN);
Chris@82 453 TW = FMA(KP623489801, Ta, T3) + FNMA(KP900968867, To, KP222520933 * Th);
Chris@82 454 ro[WS(os, 13)] = TW - TX;
Chris@82 455 ro[WS(os, 1)] = TW + TX;
Chris@82 456 T1b = FMA(KP781831482, T10, KP974927912 * T12) + (KP433883739 * T11);
Chris@82 457 T1c = FMA(KP623489801, T19, T16) + FNMA(KP900968867, T18, KP222520933 * T17);
Chris@82 458 io[WS(os, 1)] = T1b + T1c;
Chris@82 459 io[WS(os, 13)] = T1c - T1b;
Chris@82 460 }
Chris@82 461 {
Chris@82 462 E TZ, TY, T13, T1a;
Chris@82 463 TZ = FMA(KP433883739, TG, KP974927912 * TN) - (KP781831482 * TU);
Chris@82 464 TY = FMA(KP623489801, Th, T3) + FNMA(KP222520933, To, KP900968867 * Ta);
Chris@82 465 ro[WS(os, 11)] = TY - TZ;
Chris@82 466 ro[WS(os, 3)] = TY + TZ;
Chris@82 467 T13 = FMA(KP433883739, T10, KP974927912 * T11) - (KP781831482 * T12);
Chris@82 468 T1a = FMA(KP623489801, T17, T16) + FNMA(KP222520933, T18, KP900968867 * T19);
Chris@82 469 io[WS(os, 3)] = T13 + T1a;
Chris@82 470 io[WS(os, 11)] = T1a - T13;
Chris@82 471 }
Chris@82 472 {
Chris@82 473 E T1t, T1p, T1C, T1y;
Chris@82 474 T1t = FNMS(KP433883739, T1r, KP781831482 * T1q) - (KP974927912 * T1s);
Chris@82 475 T1p = FMA(KP623489801, T1i, T1f) + FNMA(KP900968867, T1l, KP222520933 * T1o);
Chris@82 476 io[WS(os, 6)] = T1p - T1t;
Chris@82 477 io[WS(os, 8)] = T1t + T1p;
Chris@82 478 T1C = FNMS(KP433883739, T1A, KP781831482 * T1z) - (KP974927912 * T1B);
Chris@82 479 T1y = FMA(KP623489801, Ts, Tp) + FNMA(KP900968867, Ty, KP222520933 * Tv);
Chris@82 480 ro[WS(os, 6)] = T1y - T1C;
Chris@82 481 ro[WS(os, 8)] = T1y + T1C;
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E T1v, T1u, T1E, T1D;
Chris@82 485 T1v = FMA(KP433883739, T1q, KP781831482 * T1s) - (KP974927912 * T1r);
Chris@82 486 T1u = FMA(KP623489801, T1o, T1f) + FNMA(KP222520933, T1l, KP900968867 * T1i);
Chris@82 487 io[WS(os, 4)] = T1u - T1v;
Chris@82 488 io[WS(os, 10)] = T1v + T1u;
Chris@82 489 T1E = FMA(KP433883739, T1z, KP781831482 * T1B) - (KP974927912 * T1A);
Chris@82 490 T1D = FMA(KP623489801, Tv, Tp) + FNMA(KP222520933, Ty, KP900968867 * Ts);
Chris@82 491 ro[WS(os, 4)] = T1D - T1E;
Chris@82 492 ro[WS(os, 10)] = T1D + T1E;
Chris@82 493 }
Chris@82 494 {
Chris@82 495 E T1w, T1x, T1G, T1F;
Chris@82 496 T1w = FMA(KP974927912, T1q, KP433883739 * T1s) + (KP781831482 * T1r);
Chris@82 497 T1x = FMA(KP623489801, T1l, T1f) + FNMA(KP900968867, T1o, KP222520933 * T1i);
Chris@82 498 io[WS(os, 2)] = T1w + T1x;
Chris@82 499 io[WS(os, 12)] = T1x - T1w;
Chris@82 500 T1G = FMA(KP974927912, T1z, KP433883739 * T1B) + (KP781831482 * T1A);
Chris@82 501 T1F = FMA(KP623489801, Ty, Tp) + FNMA(KP900968867, Tv, KP222520933 * Ts);
Chris@82 502 ro[WS(os, 12)] = T1F - T1G;
Chris@82 503 ro[WS(os, 2)] = T1F + T1G;
Chris@82 504 }
Chris@82 505 }
Chris@82 506 }
Chris@82 507 }
Chris@82 508
Chris@82 509 static const kdft_desc desc = { 14, "n1_14", {100, 24, 48, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 510
Chris@82 511 void X(codelet_n1_14) (planner *p) {
Chris@82 512 X(kdft_register) (p, n1_14, &desc);
Chris@82 513 }
Chris@82 514
Chris@82 515 #endif