annotate src/fftw-3.3.5/dft/scalar/codelets/n1_14.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:35:53 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include n.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 148 FP additions, 84 FP multiplications,
Chris@42 32 * (or, 64 additions, 0 multiplications, 84 fused multiply/add),
Chris@42 33 * 80 stack variables, 6 constants, and 56 memory accesses
Chris@42 34 */
Chris@42 35 #include "n.h"
Chris@42 36
Chris@42 37 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 40 DK(KP801937735, +0.801937735804838252472204639014890102331838324);
Chris@42 41 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 42 DK(KP554958132, +0.554958132087371191422194871006410481067288862);
Chris@42 43 DK(KP692021471, +0.692021471630095869627814897002069140197260599);
Chris@42 44 DK(KP356895867, +0.356895867892209443894399510021300583399127187);
Chris@42 45 {
Chris@42 46 INT i;
Chris@42 47 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) {
Chris@42 48 E Tp, T1L, T24, T1W, T1X, T28, T2a, T1Y, T29, T2b;
Chris@42 49 {
Chris@42 50 E T3, T1x, T1b, To, T1i, T1M, Ts, Ta, T1k, Tv, Th, T1j, T1K, Ty, TZ;
Chris@42 51 E T14, Tz, T1Z, T27, T2c, T1d, TI, T23, T1G, T1D, TW, T1e, T22, T1A, TP;
Chris@42 52 E T1c, T1n, T1s, T1f, T1P;
Chris@42 53 {
Chris@42 54 E T1, T2, T19, T1a;
Chris@42 55 T1 = ri[0];
Chris@42 56 T2 = ri[WS(is, 7)];
Chris@42 57 T19 = ii[0];
Chris@42 58 T1a = ii[WS(is, 7)];
Chris@42 59 {
Chris@42 60 E Tq, T6, Tr, T9, Te, Tx, Tn, Tw, Tk, Tf, Tb, Tc;
Chris@42 61 {
Chris@42 62 E Tl, Tm, Ti, Tj;
Chris@42 63 {
Chris@42 64 E T4, T5, T7, T8;
Chris@42 65 T4 = ri[WS(is, 2)];
Chris@42 66 Tp = T1 + T2;
Chris@42 67 T3 = T1 - T2;
Chris@42 68 T1x = T19 + T1a;
Chris@42 69 T1b = T19 - T1a;
Chris@42 70 T5 = ri[WS(is, 9)];
Chris@42 71 T7 = ri[WS(is, 12)];
Chris@42 72 T8 = ri[WS(is, 5)];
Chris@42 73 Tl = ri[WS(is, 8)];
Chris@42 74 Tq = T4 + T5;
Chris@42 75 T6 = T4 - T5;
Chris@42 76 Tr = T7 + T8;
Chris@42 77 T9 = T7 - T8;
Chris@42 78 Tm = ri[WS(is, 1)];
Chris@42 79 }
Chris@42 80 Ti = ri[WS(is, 6)];
Chris@42 81 Tj = ri[WS(is, 13)];
Chris@42 82 Te = ri[WS(is, 10)];
Chris@42 83 Tx = Tl + Tm;
Chris@42 84 Tn = Tl - Tm;
Chris@42 85 Tw = Ti + Tj;
Chris@42 86 Tk = Ti - Tj;
Chris@42 87 Tf = ri[WS(is, 3)];
Chris@42 88 Tb = ri[WS(is, 4)];
Chris@42 89 Tc = ri[WS(is, 11)];
Chris@42 90 }
Chris@42 91 {
Chris@42 92 E Tu, Tg, Tt, Td;
Chris@42 93 To = Tk + Tn;
Chris@42 94 T1i = Tn - Tk;
Chris@42 95 Tu = Te + Tf;
Chris@42 96 Tg = Te - Tf;
Chris@42 97 Tt = Tb + Tc;
Chris@42 98 Td = Tb - Tc;
Chris@42 99 T1M = Tr - Tq;
Chris@42 100 Ts = Tq + Tr;
Chris@42 101 Ta = T6 + T9;
Chris@42 102 T1k = T9 - T6;
Chris@42 103 T1L = Tt - Tu;
Chris@42 104 Tv = Tt + Tu;
Chris@42 105 Th = Td + Tg;
Chris@42 106 T1j = Tg - Td;
Chris@42 107 T1K = Tw - Tx;
Chris@42 108 Ty = Tw + Tx;
Chris@42 109 TZ = FNMS(KP356895867, Ta, To);
Chris@42 110 T14 = FNMS(KP356895867, To, Th);
Chris@42 111 Tz = FNMS(KP356895867, Th, Ta);
Chris@42 112 T1Z = FNMS(KP356895867, Ts, Ty);
Chris@42 113 }
Chris@42 114 }
Chris@42 115 {
Chris@42 116 E T1B, TE, T1C, TH, T1F, TV, TJ, T1E, TS, T1z, TO, TK, T1y, TL;
Chris@42 117 {
Chris@42 118 E TF, TG, TT, TU, TC, TD;
Chris@42 119 TC = ii[WS(is, 4)];
Chris@42 120 TD = ii[WS(is, 11)];
Chris@42 121 T27 = FNMS(KP356895867, Tv, Ts);
Chris@42 122 T2c = FNMS(KP356895867, Ty, Tv);
Chris@42 123 TF = ii[WS(is, 10)];
Chris@42 124 T1B = TC + TD;
Chris@42 125 TE = TC - TD;
Chris@42 126 TG = ii[WS(is, 3)];
Chris@42 127 TT = ii[WS(is, 8)];
Chris@42 128 TU = ii[WS(is, 1)];
Chris@42 129 {
Chris@42 130 E TQ, TR, TM, TN;
Chris@42 131 TQ = ii[WS(is, 6)];
Chris@42 132 T1C = TF + TG;
Chris@42 133 TH = TF - TG;
Chris@42 134 T1F = TT + TU;
Chris@42 135 TV = TT - TU;
Chris@42 136 TR = ii[WS(is, 13)];
Chris@42 137 TM = ii[WS(is, 12)];
Chris@42 138 TN = ii[WS(is, 5)];
Chris@42 139 TJ = ii[WS(is, 2)];
Chris@42 140 T1E = TQ + TR;
Chris@42 141 TS = TQ - TR;
Chris@42 142 T1z = TM + TN;
Chris@42 143 TO = TM - TN;
Chris@42 144 TK = ii[WS(is, 9)];
Chris@42 145 }
Chris@42 146 }
Chris@42 147 T1d = TE + TH;
Chris@42 148 TI = TE - TH;
Chris@42 149 T23 = T1F - T1E;
Chris@42 150 T1G = T1E + T1F;
Chris@42 151 T1D = T1B + T1C;
Chris@42 152 T24 = T1C - T1B;
Chris@42 153 T1y = TJ + TK;
Chris@42 154 TL = TJ - TK;
Chris@42 155 TW = TS - TV;
Chris@42 156 T1e = TS + TV;
Chris@42 157 T22 = T1y - T1z;
Chris@42 158 T1A = T1y + T1z;
Chris@42 159 TP = TL - TO;
Chris@42 160 T1c = TL + TO;
Chris@42 161 T1n = FNMS(KP356895867, T1c, T1e);
Chris@42 162 T1s = FNMS(KP356895867, T1d, T1c);
Chris@42 163 T1f = FNMS(KP356895867, T1e, T1d);
Chris@42 164 T1P = FNMS(KP356895867, T1A, T1G);
Chris@42 165 }
Chris@42 166 }
Chris@42 167 {
Chris@42 168 E T1U, T1H, T11, T12, T1o, T1q;
Chris@42 169 ro[WS(os, 7)] = T3 + Ta + Th + To;
Chris@42 170 io[WS(os, 7)] = T1b + T1c + T1d + T1e;
Chris@42 171 T1U = FNMS(KP356895867, T1D, T1A);
Chris@42 172 T1H = FNMS(KP356895867, T1G, T1D);
Chris@42 173 ro[0] = Tp + Ts + Tv + Ty;
Chris@42 174 io[0] = T1x + T1A + T1D + T1G;
Chris@42 175 {
Chris@42 176 E TB, TY, T1u, T1w, T10;
Chris@42 177 {
Chris@42 178 E TA, TX, T1t, T1v;
Chris@42 179 TA = FNMS(KP692021471, Tz, To);
Chris@42 180 TX = FMA(KP554958132, TW, TP);
Chris@42 181 T1t = FNMS(KP692021471, T1s, T1e);
Chris@42 182 T1v = FMA(KP554958132, T1i, T1k);
Chris@42 183 TB = FNMS(KP900968867, TA, T3);
Chris@42 184 TY = FMA(KP801937735, TX, TI);
Chris@42 185 T1u = FNMS(KP900968867, T1t, T1b);
Chris@42 186 T1w = FMA(KP801937735, T1v, T1j);
Chris@42 187 }
Chris@42 188 T10 = FNMS(KP692021471, TZ, Th);
Chris@42 189 ro[WS(os, 1)] = FMA(KP974927912, TY, TB);
Chris@42 190 ro[WS(os, 13)] = FNMS(KP974927912, TY, TB);
Chris@42 191 io[WS(os, 13)] = FNMS(KP974927912, T1w, T1u);
Chris@42 192 io[WS(os, 1)] = FMA(KP974927912, T1w, T1u);
Chris@42 193 T11 = FNMS(KP900968867, T10, T3);
Chris@42 194 T12 = FMA(KP554958132, TI, TW);
Chris@42 195 T1o = FNMS(KP692021471, T1n, T1d);
Chris@42 196 T1q = FMA(KP554958132, T1j, T1i);
Chris@42 197 }
Chris@42 198 {
Chris@42 199 E T1J, T1N, T2d, T2f;
Chris@42 200 {
Chris@42 201 E T16, T17, T1g, T1l;
Chris@42 202 {
Chris@42 203 E T13, T1p, T1r, T15;
Chris@42 204 T15 = FNMS(KP692021471, T14, Ta);
Chris@42 205 T13 = FNMS(KP801937735, T12, TP);
Chris@42 206 T1p = FNMS(KP900968867, T1o, T1b);
Chris@42 207 T1r = FNMS(KP801937735, T1q, T1k);
Chris@42 208 T16 = FNMS(KP900968867, T15, T3);
Chris@42 209 ro[WS(os, 9)] = FMA(KP974927912, T13, T11);
Chris@42 210 ro[WS(os, 5)] = FNMS(KP974927912, T13, T11);
Chris@42 211 io[WS(os, 9)] = FMA(KP974927912, T1r, T1p);
Chris@42 212 io[WS(os, 5)] = FNMS(KP974927912, T1r, T1p);
Chris@42 213 T17 = FNMS(KP554958132, TP, TI);
Chris@42 214 }
Chris@42 215 T1g = FNMS(KP692021471, T1f, T1c);
Chris@42 216 T1l = FNMS(KP554958132, T1k, T1j);
Chris@42 217 {
Chris@42 218 E T18, T1h, T1m, T1I;
Chris@42 219 T1I = FNMS(KP692021471, T1H, T1A);
Chris@42 220 T18 = FNMS(KP801937735, T17, TW);
Chris@42 221 T1h = FNMS(KP900968867, T1g, T1b);
Chris@42 222 T1m = FNMS(KP801937735, T1l, T1i);
Chris@42 223 T1J = FNMS(KP900968867, T1I, T1x);
Chris@42 224 ro[WS(os, 3)] = FMA(KP974927912, T18, T16);
Chris@42 225 ro[WS(os, 11)] = FNMS(KP974927912, T18, T16);
Chris@42 226 io[WS(os, 11)] = FNMS(KP974927912, T1m, T1h);
Chris@42 227 io[WS(os, 3)] = FMA(KP974927912, T1m, T1h);
Chris@42 228 T1N = FMA(KP554958132, T1M, T1L);
Chris@42 229 }
Chris@42 230 T2d = FNMS(KP692021471, T2c, Ts);
Chris@42 231 T2f = FMA(KP554958132, T22, T24);
Chris@42 232 }
Chris@42 233 {
Chris@42 234 E T1R, T1S, T20, T25;
Chris@42 235 {
Chris@42 236 E T1O, T2e, T2g, T1Q;
Chris@42 237 T1Q = FNMS(KP692021471, T1P, T1D);
Chris@42 238 T1O = FNMS(KP801937735, T1N, T1K);
Chris@42 239 T2e = FNMS(KP900968867, T2d, Tp);
Chris@42 240 T2g = FNMS(KP801937735, T2f, T23);
Chris@42 241 T1R = FNMS(KP900968867, T1Q, T1x);
Chris@42 242 io[WS(os, 10)] = FNMS(KP974927912, T1O, T1J);
Chris@42 243 io[WS(os, 4)] = FMA(KP974927912, T1O, T1J);
Chris@42 244 ro[WS(os, 4)] = FMA(KP974927912, T2g, T2e);
Chris@42 245 ro[WS(os, 10)] = FNMS(KP974927912, T2g, T2e);
Chris@42 246 T1S = FMA(KP554958132, T1L, T1K);
Chris@42 247 }
Chris@42 248 T20 = FNMS(KP692021471, T1Z, Tv);
Chris@42 249 T25 = FMA(KP554958132, T24, T23);
Chris@42 250 {
Chris@42 251 E T1T, T21, T26, T1V;
Chris@42 252 T1V = FNMS(KP692021471, T1U, T1G);
Chris@42 253 T1T = FMA(KP801937735, T1S, T1M);
Chris@42 254 T21 = FNMS(KP900968867, T20, Tp);
Chris@42 255 T26 = FMA(KP801937735, T25, T22);
Chris@42 256 T1W = FNMS(KP900968867, T1V, T1x);
Chris@42 257 io[WS(os, 12)] = FNMS(KP974927912, T1T, T1R);
Chris@42 258 io[WS(os, 2)] = FMA(KP974927912, T1T, T1R);
Chris@42 259 ro[WS(os, 2)] = FMA(KP974927912, T26, T21);
Chris@42 260 ro[WS(os, 12)] = FNMS(KP974927912, T26, T21);
Chris@42 261 T1X = FNMS(KP554958132, T1K, T1M);
Chris@42 262 }
Chris@42 263 T28 = FNMS(KP692021471, T27, Ty);
Chris@42 264 T2a = FNMS(KP554958132, T23, T22);
Chris@42 265 }
Chris@42 266 }
Chris@42 267 }
Chris@42 268 }
Chris@42 269 T1Y = FNMS(KP801937735, T1X, T1L);
Chris@42 270 T29 = FNMS(KP900968867, T28, Tp);
Chris@42 271 T2b = FNMS(KP801937735, T2a, T24);
Chris@42 272 io[WS(os, 8)] = FNMS(KP974927912, T1Y, T1W);
Chris@42 273 io[WS(os, 6)] = FMA(KP974927912, T1Y, T1W);
Chris@42 274 ro[WS(os, 6)] = FMA(KP974927912, T2b, T29);
Chris@42 275 ro[WS(os, 8)] = FNMS(KP974927912, T2b, T29);
Chris@42 276 }
Chris@42 277 }
Chris@42 278 }
Chris@42 279
Chris@42 280 static const kdft_desc desc = { 14, "n1_14", {64, 0, 84, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 281
Chris@42 282 void X(codelet_n1_14) (planner *p) {
Chris@42 283 X(kdft_register) (p, n1_14, &desc);
Chris@42 284 }
Chris@42 285
Chris@42 286 #else /* HAVE_FMA */
Chris@42 287
Chris@42 288 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include n.h */
Chris@42 289
Chris@42 290 /*
Chris@42 291 * This function contains 148 FP additions, 72 FP multiplications,
Chris@42 292 * (or, 100 additions, 24 multiplications, 48 fused multiply/add),
Chris@42 293 * 43 stack variables, 6 constants, and 56 memory accesses
Chris@42 294 */
Chris@42 295 #include "n.h"
Chris@42 296
Chris@42 297 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 298 {
Chris@42 299 DK(KP222520933, +0.222520933956314404288902564496794759466355569);
Chris@42 300 DK(KP900968867, +0.900968867902419126236102319507445051165919162);
Chris@42 301 DK(KP623489801, +0.623489801858733530525004884004239810632274731);
Chris@42 302 DK(KP433883739, +0.433883739117558120475768332848358754609990728);
Chris@42 303 DK(KP781831482, +0.781831482468029808708444526674057750232334519);
Chris@42 304 DK(KP974927912, +0.974927912181823607018131682993931217232785801);
Chris@42 305 {
Chris@42 306 INT i;
Chris@42 307 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) {
Chris@42 308 E T3, Tp, T16, T1f, Ta, T1q, Ts, T10, TG, T1z, T19, T1i, Th, T1s, Tv;
Chris@42 309 E T12, TU, T1B, T17, T1o, To, T1r, Ty, T11, TN, T1A, T18, T1l;
Chris@42 310 {
Chris@42 311 E T1, T2, T14, T15;
Chris@42 312 T1 = ri[0];
Chris@42 313 T2 = ri[WS(is, 7)];
Chris@42 314 T3 = T1 - T2;
Chris@42 315 Tp = T1 + T2;
Chris@42 316 T14 = ii[0];
Chris@42 317 T15 = ii[WS(is, 7)];
Chris@42 318 T16 = T14 - T15;
Chris@42 319 T1f = T14 + T15;
Chris@42 320 }
Chris@42 321 {
Chris@42 322 E T6, Tq, T9, Tr;
Chris@42 323 {
Chris@42 324 E T4, T5, T7, T8;
Chris@42 325 T4 = ri[WS(is, 2)];
Chris@42 326 T5 = ri[WS(is, 9)];
Chris@42 327 T6 = T4 - T5;
Chris@42 328 Tq = T4 + T5;
Chris@42 329 T7 = ri[WS(is, 12)];
Chris@42 330 T8 = ri[WS(is, 5)];
Chris@42 331 T9 = T7 - T8;
Chris@42 332 Tr = T7 + T8;
Chris@42 333 }
Chris@42 334 Ta = T6 + T9;
Chris@42 335 T1q = Tr - Tq;
Chris@42 336 Ts = Tq + Tr;
Chris@42 337 T10 = T9 - T6;
Chris@42 338 }
Chris@42 339 {
Chris@42 340 E TC, T1g, TF, T1h;
Chris@42 341 {
Chris@42 342 E TA, TB, TD, TE;
Chris@42 343 TA = ii[WS(is, 2)];
Chris@42 344 TB = ii[WS(is, 9)];
Chris@42 345 TC = TA - TB;
Chris@42 346 T1g = TA + TB;
Chris@42 347 TD = ii[WS(is, 12)];
Chris@42 348 TE = ii[WS(is, 5)];
Chris@42 349 TF = TD - TE;
Chris@42 350 T1h = TD + TE;
Chris@42 351 }
Chris@42 352 TG = TC - TF;
Chris@42 353 T1z = T1g - T1h;
Chris@42 354 T19 = TC + TF;
Chris@42 355 T1i = T1g + T1h;
Chris@42 356 }
Chris@42 357 {
Chris@42 358 E Td, Tt, Tg, Tu;
Chris@42 359 {
Chris@42 360 E Tb, Tc, Te, Tf;
Chris@42 361 Tb = ri[WS(is, 4)];
Chris@42 362 Tc = ri[WS(is, 11)];
Chris@42 363 Td = Tb - Tc;
Chris@42 364 Tt = Tb + Tc;
Chris@42 365 Te = ri[WS(is, 10)];
Chris@42 366 Tf = ri[WS(is, 3)];
Chris@42 367 Tg = Te - Tf;
Chris@42 368 Tu = Te + Tf;
Chris@42 369 }
Chris@42 370 Th = Td + Tg;
Chris@42 371 T1s = Tt - Tu;
Chris@42 372 Tv = Tt + Tu;
Chris@42 373 T12 = Tg - Td;
Chris@42 374 }
Chris@42 375 {
Chris@42 376 E TQ, T1m, TT, T1n;
Chris@42 377 {
Chris@42 378 E TO, TP, TR, TS;
Chris@42 379 TO = ii[WS(is, 4)];
Chris@42 380 TP = ii[WS(is, 11)];
Chris@42 381 TQ = TO - TP;
Chris@42 382 T1m = TO + TP;
Chris@42 383 TR = ii[WS(is, 10)];
Chris@42 384 TS = ii[WS(is, 3)];
Chris@42 385 TT = TR - TS;
Chris@42 386 T1n = TR + TS;
Chris@42 387 }
Chris@42 388 TU = TQ - TT;
Chris@42 389 T1B = T1n - T1m;
Chris@42 390 T17 = TQ + TT;
Chris@42 391 T1o = T1m + T1n;
Chris@42 392 }
Chris@42 393 {
Chris@42 394 E Tk, Tw, Tn, Tx;
Chris@42 395 {
Chris@42 396 E Ti, Tj, Tl, Tm;
Chris@42 397 Ti = ri[WS(is, 6)];
Chris@42 398 Tj = ri[WS(is, 13)];
Chris@42 399 Tk = Ti - Tj;
Chris@42 400 Tw = Ti + Tj;
Chris@42 401 Tl = ri[WS(is, 8)];
Chris@42 402 Tm = ri[WS(is, 1)];
Chris@42 403 Tn = Tl - Tm;
Chris@42 404 Tx = Tl + Tm;
Chris@42 405 }
Chris@42 406 To = Tk + Tn;
Chris@42 407 T1r = Tw - Tx;
Chris@42 408 Ty = Tw + Tx;
Chris@42 409 T11 = Tn - Tk;
Chris@42 410 }
Chris@42 411 {
Chris@42 412 E TJ, T1j, TM, T1k;
Chris@42 413 {
Chris@42 414 E TH, TI, TK, TL;
Chris@42 415 TH = ii[WS(is, 6)];
Chris@42 416 TI = ii[WS(is, 13)];
Chris@42 417 TJ = TH - TI;
Chris@42 418 T1j = TH + TI;
Chris@42 419 TK = ii[WS(is, 8)];
Chris@42 420 TL = ii[WS(is, 1)];
Chris@42 421 TM = TK - TL;
Chris@42 422 T1k = TK + TL;
Chris@42 423 }
Chris@42 424 TN = TJ - TM;
Chris@42 425 T1A = T1k - T1j;
Chris@42 426 T18 = TJ + TM;
Chris@42 427 T1l = T1j + T1k;
Chris@42 428 }
Chris@42 429 ro[WS(os, 7)] = T3 + Ta + Th + To;
Chris@42 430 io[WS(os, 7)] = T16 + T19 + T17 + T18;
Chris@42 431 ro[0] = Tp + Ts + Tv + Ty;
Chris@42 432 io[0] = T1f + T1i + T1o + T1l;
Chris@42 433 {
Chris@42 434 E TV, Tz, T1e, T1d;
Chris@42 435 TV = FNMS(KP781831482, TN, KP974927912 * TG) - (KP433883739 * TU);
Chris@42 436 Tz = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
Chris@42 437 ro[WS(os, 5)] = Tz - TV;
Chris@42 438 ro[WS(os, 9)] = Tz + TV;
Chris@42 439 T1e = FNMS(KP781831482, T11, KP974927912 * T10) - (KP433883739 * T12);
Chris@42 440 T1d = FMA(KP623489801, T18, T16) + FNMA(KP900968867, T17, KP222520933 * T19);
Chris@42 441 io[WS(os, 5)] = T1d - T1e;
Chris@42 442 io[WS(os, 9)] = T1e + T1d;
Chris@42 443 }
Chris@42 444 {
Chris@42 445 E TX, TW, T1b, T1c;
Chris@42 446 TX = FMA(KP781831482, TG, KP974927912 * TU) + (KP433883739 * TN);
Chris@42 447 TW = FMA(KP623489801, Ta, T3) + FNMA(KP900968867, To, KP222520933 * Th);
Chris@42 448 ro[WS(os, 13)] = TW - TX;
Chris@42 449 ro[WS(os, 1)] = TW + TX;
Chris@42 450 T1b = FMA(KP781831482, T10, KP974927912 * T12) + (KP433883739 * T11);
Chris@42 451 T1c = FMA(KP623489801, T19, T16) + FNMA(KP900968867, T18, KP222520933 * T17);
Chris@42 452 io[WS(os, 1)] = T1b + T1c;
Chris@42 453 io[WS(os, 13)] = T1c - T1b;
Chris@42 454 }
Chris@42 455 {
Chris@42 456 E TZ, TY, T13, T1a;
Chris@42 457 TZ = FMA(KP433883739, TG, KP974927912 * TN) - (KP781831482 * TU);
Chris@42 458 TY = FMA(KP623489801, Th, T3) + FNMA(KP222520933, To, KP900968867 * Ta);
Chris@42 459 ro[WS(os, 11)] = TY - TZ;
Chris@42 460 ro[WS(os, 3)] = TY + TZ;
Chris@42 461 T13 = FMA(KP433883739, T10, KP974927912 * T11) - (KP781831482 * T12);
Chris@42 462 T1a = FMA(KP623489801, T17, T16) + FNMA(KP222520933, T18, KP900968867 * T19);
Chris@42 463 io[WS(os, 3)] = T13 + T1a;
Chris@42 464 io[WS(os, 11)] = T1a - T13;
Chris@42 465 }
Chris@42 466 {
Chris@42 467 E T1t, T1p, T1C, T1y;
Chris@42 468 T1t = FNMS(KP433883739, T1r, KP781831482 * T1q) - (KP974927912 * T1s);
Chris@42 469 T1p = FMA(KP623489801, T1i, T1f) + FNMA(KP900968867, T1l, KP222520933 * T1o);
Chris@42 470 io[WS(os, 6)] = T1p - T1t;
Chris@42 471 io[WS(os, 8)] = T1t + T1p;
Chris@42 472 T1C = FNMS(KP433883739, T1A, KP781831482 * T1z) - (KP974927912 * T1B);
Chris@42 473 T1y = FMA(KP623489801, Ts, Tp) + FNMA(KP900968867, Ty, KP222520933 * Tv);
Chris@42 474 ro[WS(os, 6)] = T1y - T1C;
Chris@42 475 ro[WS(os, 8)] = T1y + T1C;
Chris@42 476 }
Chris@42 477 {
Chris@42 478 E T1v, T1u, T1E, T1D;
Chris@42 479 T1v = FMA(KP433883739, T1q, KP781831482 * T1s) - (KP974927912 * T1r);
Chris@42 480 T1u = FMA(KP623489801, T1o, T1f) + FNMA(KP222520933, T1l, KP900968867 * T1i);
Chris@42 481 io[WS(os, 4)] = T1u - T1v;
Chris@42 482 io[WS(os, 10)] = T1v + T1u;
Chris@42 483 T1E = FMA(KP433883739, T1z, KP781831482 * T1B) - (KP974927912 * T1A);
Chris@42 484 T1D = FMA(KP623489801, Tv, Tp) + FNMA(KP222520933, Ty, KP900968867 * Ts);
Chris@42 485 ro[WS(os, 4)] = T1D - T1E;
Chris@42 486 ro[WS(os, 10)] = T1D + T1E;
Chris@42 487 }
Chris@42 488 {
Chris@42 489 E T1w, T1x, T1G, T1F;
Chris@42 490 T1w = FMA(KP974927912, T1q, KP433883739 * T1s) + (KP781831482 * T1r);
Chris@42 491 T1x = FMA(KP623489801, T1l, T1f) + FNMA(KP900968867, T1o, KP222520933 * T1i);
Chris@42 492 io[WS(os, 2)] = T1w + T1x;
Chris@42 493 io[WS(os, 12)] = T1x - T1w;
Chris@42 494 T1G = FMA(KP974927912, T1z, KP433883739 * T1B) + (KP781831482 * T1A);
Chris@42 495 T1F = FMA(KP623489801, Ty, Tp) + FNMA(KP900968867, Tv, KP222520933 * Ts);
Chris@42 496 ro[WS(os, 12)] = T1F - T1G;
Chris@42 497 ro[WS(os, 2)] = T1F + T1G;
Chris@42 498 }
Chris@42 499 }
Chris@42 500 }
Chris@42 501 }
Chris@42 502
Chris@42 503 static const kdft_desc desc = { 14, "n1_14", {100, 24, 48, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 504
Chris@42 505 void X(codelet_n1_14) (planner *p) {
Chris@42 506 X(kdft_register) (p, n1_14, &desc);
Chris@42 507 }
Chris@42 508
Chris@42 509 #endif /* HAVE_FMA */