annotate src/fftw-3.3.3/dft/scalar/codelets/n1_11.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:35:43 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 11 -name n1_11 -include n.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 140 FP additions, 110 FP multiplications,
Chris@10 32 * (or, 30 additions, 0 multiplications, 110 fused multiply/add),
Chris@10 33 * 84 stack variables, 10 constants, and 44 memory accesses
Chris@10 34 */
Chris@10 35 #include "n.h"
Chris@10 36
Chris@10 37 static void n1_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DK(KP989821441, +0.989821441880932732376092037776718787376519372);
Chris@10 40 DK(KP959492973, +0.959492973614497389890368057066327699062454848);
Chris@10 41 DK(KP918985947, +0.918985947228994779780736114132655398124909697);
Chris@10 42 DK(KP876768831, +0.876768831002589333891339807079336796764054852);
Chris@10 43 DK(KP830830026, +0.830830026003772851058548298459246407048009821);
Chris@10 44 DK(KP778434453, +0.778434453334651800608337670740821884709317477);
Chris@10 45 DK(KP715370323, +0.715370323453429719112414662767260662417897278);
Chris@10 46 DK(KP634356270, +0.634356270682424498893150776899916060542806975);
Chris@10 47 DK(KP342584725, +0.342584725681637509502641509861112333758894680);
Chris@10 48 DK(KP521108558, +0.521108558113202722944698153526659300680427422);
Chris@10 49 {
Chris@10 50 INT i;
Chris@10 51 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(44, is), MAKE_VOLATILE_STRIDE(44, os)) {
Chris@10 52 E T1, TA, T1p, T1y, T19, T1d, T1a, T1e;
Chris@10 53 {
Chris@10 54 E T1f, T1u, T4, T1q, Tg, T1t, T7, T1s, Ta, Td, T1r, TP, T1X, T26, Ti;
Chris@10 55 E TG, T1O, T1w, TY, T1F, T17, To, T1i, T1k, T1h, Tr, T1j, Tu, T1g, Tx;
Chris@10 56 E T21, TU, TL, TC, T1S, T1J, T1m, T12, T1z, T1b;
Chris@10 57 T1 = ri[0];
Chris@10 58 T1f = ii[0];
Chris@10 59 {
Chris@10 60 E T1E, T16, Tb, Tc, Tv, Tw;
Chris@10 61 {
Chris@10 62 E T2, T3, Te, Tf;
Chris@10 63 T2 = ri[WS(is, 1)];
Chris@10 64 T3 = ri[WS(is, 10)];
Chris@10 65 Te = ri[WS(is, 5)];
Chris@10 66 Tf = ri[WS(is, 6)];
Chris@10 67 {
Chris@10 68 E T5, T6, T8, T9;
Chris@10 69 T5 = ri[WS(is, 2)];
Chris@10 70 T1u = T3 - T2;
Chris@10 71 T4 = T2 + T3;
Chris@10 72 T1q = Tf - Te;
Chris@10 73 Tg = Te + Tf;
Chris@10 74 T6 = ri[WS(is, 9)];
Chris@10 75 T8 = ri[WS(is, 3)];
Chris@10 76 T9 = ri[WS(is, 8)];
Chris@10 77 Tb = ri[WS(is, 4)];
Chris@10 78 T1t = T6 - T5;
Chris@10 79 T7 = T5 + T6;
Chris@10 80 T1s = T9 - T8;
Chris@10 81 Ta = T8 + T9;
Chris@10 82 Tc = ri[WS(is, 7)];
Chris@10 83 }
Chris@10 84 }
Chris@10 85 {
Chris@10 86 E T25, Th, T1W, TO;
Chris@10 87 T25 = FMA(KP521108558, T1q, T1u);
Chris@10 88 T1W = FMA(KP521108558, T1s, T1q);
Chris@10 89 TO = FNMS(KP342584725, T4, Ta);
Chris@10 90 Th = FNMS(KP342584725, Ta, T7);
Chris@10 91 Td = Tb + Tc;
Chris@10 92 T1r = Tc - Tb;
Chris@10 93 TP = FNMS(KP634356270, TO, Tg);
Chris@10 94 T1X = FNMS(KP715370323, T1W, T1t);
Chris@10 95 T26 = FMA(KP715370323, T25, T1r);
Chris@10 96 {
Chris@10 97 E TF, T1N, T1v, TX;
Chris@10 98 TF = FNMS(KP342584725, Td, T4);
Chris@10 99 Ti = FNMS(KP634356270, Th, Td);
Chris@10 100 T1N = FNMS(KP521108558, T1t, T1r);
Chris@10 101 T1v = FNMS(KP521108558, T1u, T1t);
Chris@10 102 TG = FNMS(KP634356270, TF, T7);
Chris@10 103 TX = FNMS(KP342584725, T7, Tg);
Chris@10 104 T1O = FMA(KP715370323, T1N, T1q);
Chris@10 105 T1w = FNMS(KP715370323, T1v, T1s);
Chris@10 106 T1E = FMA(KP521108558, T1r, T1s);
Chris@10 107 TY = FNMS(KP634356270, TX, T4);
Chris@10 108 T16 = FNMS(KP342584725, Tg, Td);
Chris@10 109 }
Chris@10 110 }
Chris@10 111 {
Chris@10 112 E Ty, Tz, Tm, Tn;
Chris@10 113 Tm = ii[WS(is, 3)];
Chris@10 114 T1F = FMA(KP715370323, T1E, T1u);
Chris@10 115 Tn = ii[WS(is, 8)];
Chris@10 116 T17 = FNMS(KP634356270, T16, Ta);
Chris@10 117 Ty = ii[WS(is, 5)];
Chris@10 118 Tz = ii[WS(is, 6)];
Chris@10 119 To = Tm - Tn;
Chris@10 120 T1i = Tm + Tn;
Chris@10 121 {
Chris@10 122 E Tp, Tq, Ts, Tt;
Chris@10 123 Tp = ii[WS(is, 2)];
Chris@10 124 T1k = Ty + Tz;
Chris@10 125 TA = Ty - Tz;
Chris@10 126 Tq = ii[WS(is, 9)];
Chris@10 127 Ts = ii[WS(is, 4)];
Chris@10 128 Tt = ii[WS(is, 7)];
Chris@10 129 Tv = ii[WS(is, 1)];
Chris@10 130 T1h = Tp + Tq;
Chris@10 131 Tr = Tp - Tq;
Chris@10 132 T1j = Ts + Tt;
Chris@10 133 Tu = Ts - Tt;
Chris@10 134 Tw = ii[WS(is, 10)];
Chris@10 135 }
Chris@10 136 }
Chris@10 137 {
Chris@10 138 E TB, T1R, T20, TK, TT, T1I, T1l;
Chris@10 139 T20 = FNMS(KP342584725, T1i, T1h);
Chris@10 140 TK = FMA(KP521108558, To, TA);
Chris@10 141 TT = FNMS(KP521108558, Tr, Tu);
Chris@10 142 T1g = Tv + Tw;
Chris@10 143 Tx = Tv - Tw;
Chris@10 144 T21 = FNMS(KP634356270, T20, T1j);
Chris@10 145 TU = FMA(KP715370323, TT, TA);
Chris@10 146 TL = FNMS(KP715370323, TK, Tr);
Chris@10 147 TB = FMA(KP521108558, TA, Tx);
Chris@10 148 T1R = FNMS(KP342584725, T1j, T1g);
Chris@10 149 T1I = FNMS(KP342584725, T1g, T1i);
Chris@10 150 T1l = FNMS(KP342584725, T1k, T1j);
Chris@10 151 TC = FMA(KP715370323, TB, Tu);
Chris@10 152 T1S = FNMS(KP634356270, T1R, T1h);
Chris@10 153 T1J = FNMS(KP634356270, T1I, T1k);
Chris@10 154 T1m = FNMS(KP634356270, T1l, T1i);
Chris@10 155 T12 = FMA(KP521108558, Tu, To);
Chris@10 156 T1z = FNMS(KP342584725, T1h, T1k);
Chris@10 157 T1b = FNMS(KP521108558, Tx, Tr);
Chris@10 158 }
Chris@10 159 }
Chris@10 160 {
Chris@10 161 E T13, T1A, T1c, T1Z, T1V, TH, TM, Tj, TD;
Chris@10 162 ro[0] = T1 + T4 + T7 + Ta + Td + Tg;
Chris@10 163 T13 = FMA(KP715370323, T12, Tx);
Chris@10 164 T1A = FNMS(KP634356270, T1z, T1g);
Chris@10 165 T1c = FNMS(KP715370323, T1b, To);
Chris@10 166 io[0] = T1f + T1g + T1h + T1i + T1j + T1k;
Chris@10 167 Tj = FNMS(KP778434453, Ti, T4);
Chris@10 168 TD = FMA(KP830830026, TC, Tr);
Chris@10 169 {
Chris@10 170 E TE, T23, T28, Tl, Tk, T22, T27;
Chris@10 171 T22 = FNMS(KP778434453, T21, T1g);
Chris@10 172 T27 = FMA(KP830830026, T26, T1t);
Chris@10 173 Tk = FNMS(KP876768831, Tj, Tg);
Chris@10 174 TE = FMA(KP918985947, TD, To);
Chris@10 175 T23 = FNMS(KP876768831, T22, T1k);
Chris@10 176 T28 = FMA(KP918985947, T27, T1s);
Chris@10 177 Tl = FNMS(KP959492973, Tk, T1);
Chris@10 178 {
Chris@10 179 E T1U, T1T, T24, T1Y;
Chris@10 180 T1T = FNMS(KP778434453, T1S, T1k);
Chris@10 181 T24 = FNMS(KP959492973, T23, T1f);
Chris@10 182 T1Y = FMA(KP830830026, T1X, T1u);
Chris@10 183 ro[WS(os, 1)] = FMA(KP989821441, TE, Tl);
Chris@10 184 ro[WS(os, 10)] = FNMS(KP989821441, TE, Tl);
Chris@10 185 T1U = FNMS(KP876768831, T1T, T1i);
Chris@10 186 io[WS(os, 10)] = FNMS(KP989821441, T28, T24);
Chris@10 187 io[WS(os, 1)] = FMA(KP989821441, T28, T24);
Chris@10 188 T1Z = FNMS(KP918985947, T1Y, T1r);
Chris@10 189 T1V = FNMS(KP959492973, T1U, T1f);
Chris@10 190 }
Chris@10 191 TH = FNMS(KP778434453, TG, Tg);
Chris@10 192 TM = FMA(KP830830026, TL, Tx);
Chris@10 193 }
Chris@10 194 {
Chris@10 195 E T1M, TZ, T14, T1Q;
Chris@10 196 {
Chris@10 197 E TN, TR, TV, TJ, TI, TQ, T1P;
Chris@10 198 TQ = FNMS(KP778434453, TP, Td);
Chris@10 199 io[WS(os, 9)] = FMA(KP989821441, T1Z, T1V);
Chris@10 200 io[WS(os, 2)] = FNMS(KP989821441, T1Z, T1V);
Chris@10 201 TI = FNMS(KP876768831, TH, Ta);
Chris@10 202 TN = FNMS(KP918985947, TM, Tu);
Chris@10 203 TR = FNMS(KP876768831, TQ, T7);
Chris@10 204 TV = FNMS(KP830830026, TU, To);
Chris@10 205 TJ = FNMS(KP959492973, TI, T1);
Chris@10 206 {
Chris@10 207 E T1L, TS, TW, T1K;
Chris@10 208 T1K = FNMS(KP778434453, T1J, T1j);
Chris@10 209 TS = FNMS(KP959492973, TR, T1);
Chris@10 210 TW = FNMS(KP918985947, TV, Tx);
Chris@10 211 ro[WS(os, 9)] = FMA(KP989821441, TN, TJ);
Chris@10 212 ro[WS(os, 2)] = FNMS(KP989821441, TN, TJ);
Chris@10 213 T1L = FNMS(KP876768831, T1K, T1h);
Chris@10 214 ro[WS(os, 3)] = FMA(KP989821441, TW, TS);
Chris@10 215 ro[WS(os, 8)] = FNMS(KP989821441, TW, TS);
Chris@10 216 T1P = FNMS(KP830830026, T1O, T1s);
Chris@10 217 T1M = FNMS(KP959492973, T1L, T1f);
Chris@10 218 }
Chris@10 219 TZ = FNMS(KP778434453, TY, Ta);
Chris@10 220 T14 = FNMS(KP830830026, T13, TA);
Chris@10 221 T1Q = FNMS(KP918985947, T1P, T1u);
Chris@10 222 }
Chris@10 223 {
Chris@10 224 E T15, T11, T1C, T1G, T1B, T10;
Chris@10 225 T1B = FNMS(KP778434453, T1A, T1i);
Chris@10 226 T10 = FNMS(KP876768831, TZ, Td);
Chris@10 227 T15 = FMA(KP918985947, T14, Tr);
Chris@10 228 io[WS(os, 8)] = FNMS(KP989821441, T1Q, T1M);
Chris@10 229 io[WS(os, 3)] = FMA(KP989821441, T1Q, T1M);
Chris@10 230 T11 = FNMS(KP959492973, T10, T1);
Chris@10 231 T1C = FNMS(KP876768831, T1B, T1j);
Chris@10 232 T1G = FNMS(KP830830026, T1F, T1q);
Chris@10 233 {
Chris@10 234 E T1D, T1H, T1o, T1x, T1n, T18;
Chris@10 235 T1n = FNMS(KP778434453, T1m, T1h);
Chris@10 236 ro[WS(os, 7)] = FMA(KP989821441, T15, T11);
Chris@10 237 ro[WS(os, 4)] = FNMS(KP989821441, T15, T11);
Chris@10 238 T1D = FNMS(KP959492973, T1C, T1f);
Chris@10 239 T1H = FMA(KP918985947, T1G, T1t);
Chris@10 240 T1o = FNMS(KP876768831, T1n, T1g);
Chris@10 241 T1x = FNMS(KP830830026, T1w, T1r);
Chris@10 242 T18 = FNMS(KP778434453, T17, T7);
Chris@10 243 io[WS(os, 7)] = FMA(KP989821441, T1H, T1D);
Chris@10 244 io[WS(os, 4)] = FNMS(KP989821441, T1H, T1D);
Chris@10 245 T1p = FNMS(KP959492973, T1o, T1f);
Chris@10 246 T1y = FNMS(KP918985947, T1x, T1q);
Chris@10 247 T19 = FNMS(KP876768831, T18, T4);
Chris@10 248 T1d = FNMS(KP830830026, T1c, Tu);
Chris@10 249 }
Chris@10 250 }
Chris@10 251 }
Chris@10 252 }
Chris@10 253 }
Chris@10 254 io[WS(os, 6)] = FNMS(KP989821441, T1y, T1p);
Chris@10 255 io[WS(os, 5)] = FMA(KP989821441, T1y, T1p);
Chris@10 256 T1a = FNMS(KP959492973, T19, T1);
Chris@10 257 T1e = FNMS(KP918985947, T1d, TA);
Chris@10 258 ro[WS(os, 5)] = FMA(KP989821441, T1e, T1a);
Chris@10 259 ro[WS(os, 6)] = FNMS(KP989821441, T1e, T1a);
Chris@10 260 }
Chris@10 261 }
Chris@10 262 }
Chris@10 263
Chris@10 264 static const kdft_desc desc = { 11, "n1_11", {30, 0, 110, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 265
Chris@10 266 void X(codelet_n1_11) (planner *p) {
Chris@10 267 X(kdft_register) (p, n1_11, &desc);
Chris@10 268 }
Chris@10 269
Chris@10 270 #else /* HAVE_FMA */
Chris@10 271
Chris@10 272 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 11 -name n1_11 -include n.h */
Chris@10 273
Chris@10 274 /*
Chris@10 275 * This function contains 140 FP additions, 100 FP multiplications,
Chris@10 276 * (or, 60 additions, 20 multiplications, 80 fused multiply/add),
Chris@10 277 * 41 stack variables, 10 constants, and 44 memory accesses
Chris@10 278 */
Chris@10 279 #include "n.h"
Chris@10 280
Chris@10 281 static void n1_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 282 {
Chris@10 283 DK(KP654860733, +0.654860733945285064056925072466293553183791199);
Chris@10 284 DK(KP142314838, +0.142314838273285140443792668616369668791051361);
Chris@10 285 DK(KP959492973, +0.959492973614497389890368057066327699062454848);
Chris@10 286 DK(KP415415013, +0.415415013001886425529274149229623203524004910);
Chris@10 287 DK(KP841253532, +0.841253532831181168861811648919367717513292498);
Chris@10 288 DK(KP989821441, +0.989821441880932732376092037776718787376519372);
Chris@10 289 DK(KP909631995, +0.909631995354518371411715383079028460060241051);
Chris@10 290 DK(KP281732556, +0.281732556841429697711417915346616899035777899);
Chris@10 291 DK(KP540640817, +0.540640817455597582107635954318691695431770608);
Chris@10 292 DK(KP755749574, +0.755749574354258283774035843972344420179717445);
Chris@10 293 {
Chris@10 294 INT i;
Chris@10 295 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(44, is), MAKE_VOLATILE_STRIDE(44, os)) {
Chris@10 296 E T1, TM, T4, TG, Tk, TR, Tw, TN, T7, TK, Ta, TH, Tn, TQ, Td;
Chris@10 297 E TJ, Tq, TO, Tt, TP, Tg, TI;
Chris@10 298 {
Chris@10 299 E T2, T3, Ti, Tj;
Chris@10 300 T1 = ri[0];
Chris@10 301 TM = ii[0];
Chris@10 302 T2 = ri[WS(is, 1)];
Chris@10 303 T3 = ri[WS(is, 10)];
Chris@10 304 T4 = T2 + T3;
Chris@10 305 TG = T3 - T2;
Chris@10 306 Ti = ii[WS(is, 1)];
Chris@10 307 Tj = ii[WS(is, 10)];
Chris@10 308 Tk = Ti - Tj;
Chris@10 309 TR = Ti + Tj;
Chris@10 310 {
Chris@10 311 E Tu, Tv, T5, T6;
Chris@10 312 Tu = ii[WS(is, 2)];
Chris@10 313 Tv = ii[WS(is, 9)];
Chris@10 314 Tw = Tu - Tv;
Chris@10 315 TN = Tu + Tv;
Chris@10 316 T5 = ri[WS(is, 2)];
Chris@10 317 T6 = ri[WS(is, 9)];
Chris@10 318 T7 = T5 + T6;
Chris@10 319 TK = T6 - T5;
Chris@10 320 }
Chris@10 321 }
Chris@10 322 {
Chris@10 323 E T8, T9, To, Tp;
Chris@10 324 T8 = ri[WS(is, 3)];
Chris@10 325 T9 = ri[WS(is, 8)];
Chris@10 326 Ta = T8 + T9;
Chris@10 327 TH = T9 - T8;
Chris@10 328 {
Chris@10 329 E Tl, Tm, Tb, Tc;
Chris@10 330 Tl = ii[WS(is, 3)];
Chris@10 331 Tm = ii[WS(is, 8)];
Chris@10 332 Tn = Tl - Tm;
Chris@10 333 TQ = Tl + Tm;
Chris@10 334 Tb = ri[WS(is, 4)];
Chris@10 335 Tc = ri[WS(is, 7)];
Chris@10 336 Td = Tb + Tc;
Chris@10 337 TJ = Tc - Tb;
Chris@10 338 }
Chris@10 339 To = ii[WS(is, 4)];
Chris@10 340 Tp = ii[WS(is, 7)];
Chris@10 341 Tq = To - Tp;
Chris@10 342 TO = To + Tp;
Chris@10 343 {
Chris@10 344 E Tr, Ts, Te, Tf;
Chris@10 345 Tr = ii[WS(is, 5)];
Chris@10 346 Ts = ii[WS(is, 6)];
Chris@10 347 Tt = Tr - Ts;
Chris@10 348 TP = Tr + Ts;
Chris@10 349 Te = ri[WS(is, 5)];
Chris@10 350 Tf = ri[WS(is, 6)];
Chris@10 351 Tg = Te + Tf;
Chris@10 352 TI = Tf - Te;
Chris@10 353 }
Chris@10 354 }
Chris@10 355 {
Chris@10 356 E Tx, Th, TZ, T10;
Chris@10 357 ro[0] = T1 + T4 + T7 + Ta + Td + Tg;
Chris@10 358 io[0] = TM + TR + TN + TQ + TO + TP;
Chris@10 359 Tx = FMA(KP755749574, Tk, KP540640817 * Tn) + FNMS(KP909631995, Tt, KP281732556 * Tq) - (KP989821441 * Tw);
Chris@10 360 Th = FMA(KP841253532, Ta, T1) + FNMS(KP959492973, Td, KP415415013 * Tg) + FNMA(KP142314838, T7, KP654860733 * T4);
Chris@10 361 ro[WS(os, 7)] = Th - Tx;
Chris@10 362 ro[WS(os, 4)] = Th + Tx;
Chris@10 363 TZ = FMA(KP755749574, TG, KP540640817 * TH) + FNMS(KP909631995, TI, KP281732556 * TJ) - (KP989821441 * TK);
Chris@10 364 T10 = FMA(KP841253532, TQ, TM) + FNMS(KP959492973, TO, KP415415013 * TP) + FNMA(KP142314838, TN, KP654860733 * TR);
Chris@10 365 io[WS(os, 4)] = TZ + T10;
Chris@10 366 io[WS(os, 7)] = T10 - TZ;
Chris@10 367 {
Chris@10 368 E TX, TY, Tz, Ty;
Chris@10 369 TX = FMA(KP909631995, TG, KP755749574 * TK) + FNMA(KP540640817, TI, KP989821441 * TJ) - (KP281732556 * TH);
Chris@10 370 TY = FMA(KP415415013, TR, TM) + FNMS(KP142314838, TO, KP841253532 * TP) + FNMA(KP959492973, TQ, KP654860733 * TN);
Chris@10 371 io[WS(os, 2)] = TX + TY;
Chris@10 372 io[WS(os, 9)] = TY - TX;
Chris@10 373 Tz = FMA(KP909631995, Tk, KP755749574 * Tw) + FNMA(KP540640817, Tt, KP989821441 * Tq) - (KP281732556 * Tn);
Chris@10 374 Ty = FMA(KP415415013, T4, T1) + FNMS(KP142314838, Td, KP841253532 * Tg) + FNMA(KP959492973, Ta, KP654860733 * T7);
Chris@10 375 ro[WS(os, 9)] = Ty - Tz;
Chris@10 376 ro[WS(os, 2)] = Ty + Tz;
Chris@10 377 }
Chris@10 378 }
Chris@10 379 {
Chris@10 380 E TB, TA, TT, TU;
Chris@10 381 TB = FMA(KP540640817, Tk, KP909631995 * Tw) + FMA(KP989821441, Tn, KP755749574 * Tq) + (KP281732556 * Tt);
Chris@10 382 TA = FMA(KP841253532, T4, T1) + FNMS(KP959492973, Tg, KP415415013 * T7) + FNMA(KP654860733, Td, KP142314838 * Ta);
Chris@10 383 ro[WS(os, 10)] = TA - TB;
Chris@10 384 ro[WS(os, 1)] = TA + TB;
Chris@10 385 {
Chris@10 386 E TV, TW, TD, TC;
Chris@10 387 TV = FMA(KP540640817, TG, KP909631995 * TK) + FMA(KP989821441, TH, KP755749574 * TJ) + (KP281732556 * TI);
Chris@10 388 TW = FMA(KP841253532, TR, TM) + FNMS(KP959492973, TP, KP415415013 * TN) + FNMA(KP654860733, TO, KP142314838 * TQ);
Chris@10 389 io[WS(os, 1)] = TV + TW;
Chris@10 390 io[WS(os, 10)] = TW - TV;
Chris@10 391 TD = FMA(KP989821441, Tk, KP540640817 * Tq) + FNMS(KP909631995, Tn, KP755749574 * Tt) - (KP281732556 * Tw);
Chris@10 392 TC = FMA(KP415415013, Ta, T1) + FNMS(KP654860733, Tg, KP841253532 * Td) + FNMA(KP959492973, T7, KP142314838 * T4);
Chris@10 393 ro[WS(os, 8)] = TC - TD;
Chris@10 394 ro[WS(os, 3)] = TC + TD;
Chris@10 395 }
Chris@10 396 TT = FMA(KP989821441, TG, KP540640817 * TJ) + FNMS(KP909631995, TH, KP755749574 * TI) - (KP281732556 * TK);
Chris@10 397 TU = FMA(KP415415013, TQ, TM) + FNMS(KP654860733, TP, KP841253532 * TO) + FNMA(KP959492973, TN, KP142314838 * TR);
Chris@10 398 io[WS(os, 3)] = TT + TU;
Chris@10 399 io[WS(os, 8)] = TU - TT;
Chris@10 400 {
Chris@10 401 E TL, TS, TF, TE;
Chris@10 402 TL = FMA(KP281732556, TG, KP755749574 * TH) + FNMS(KP909631995, TJ, KP989821441 * TI) - (KP540640817 * TK);
Chris@10 403 TS = FMA(KP841253532, TN, TM) + FNMS(KP142314838, TP, KP415415013 * TO) + FNMA(KP654860733, TQ, KP959492973 * TR);
Chris@10 404 io[WS(os, 5)] = TL + TS;
Chris@10 405 io[WS(os, 6)] = TS - TL;
Chris@10 406 TF = FMA(KP281732556, Tk, KP755749574 * Tn) + FNMS(KP909631995, Tq, KP989821441 * Tt) - (KP540640817 * Tw);
Chris@10 407 TE = FMA(KP841253532, T7, T1) + FNMS(KP142314838, Tg, KP415415013 * Td) + FNMA(KP654860733, Ta, KP959492973 * T4);
Chris@10 408 ro[WS(os, 6)] = TE - TF;
Chris@10 409 ro[WS(os, 5)] = TE + TF;
Chris@10 410 }
Chris@10 411 }
Chris@10 412 }
Chris@10 413 }
Chris@10 414 }
Chris@10 415
Chris@10 416 static const kdft_desc desc = { 11, "n1_11", {60, 20, 80, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 417
Chris@10 418 void X(codelet_n1_11) (planner *p) {
Chris@10 419 X(kdft_register) (p, n1_11, &desc);
Chris@10 420 }
Chris@10 421
Chris@10 422 #endif /* HAVE_FMA */